VirtualBox

Changeset 102634 in vbox


Ignore:
Timestamp:
Dec 18, 2023 2:24:23 PM (14 months ago)
Author:
vboxsync
Message:

VMM/IEM: Split out the emitters for the built-in threaded functions into a separate file. bugref:10371

Location:
trunk/src/VBox/VMM
Files:
3 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/Makefile.kmk

    r102549 r102634  
    262262 ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
    263263  VBoxVMM_SOURCES += \
    264         VMMAll/IEMAllN8veRecompiler.cpp
     264        VMMAll/IEMAllN8veRecompiler.cpp \
     265        VMMAll/IEMAllN8veRecompBltIn.cpp
    265266  if "$(KBUILD_TARGET_ARCH)" == "arm64" && defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER) # temp hack
    266267   VBoxVMM_DEFS        += VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp

    r102630 r102634  
    11/* $Id$ */
    22/** @file
    3  * IEM - Native Recompiler
    4  *
    5  * Logging group IEM_RE_NATIVE assignments:
    6  *      - Level 1  (Log)  : ...
    7  *      - Flow  (LogFlow) : ...
    8  *      - Level 2  (Log2) : Details calls as they're recompiled.
    9  *      - Level 3  (Log3) : Disassemble native code after recompiling.
    10  *      - Level 4  (Log4) : ...
    11  *      - Level 5  (Log5) : ...
    12  *      - Level 6  (Log6) : ...
    13  *      - Level 7  (Log7) : ...
    14  *      - Level 8  (Log8) : ...
    15  *      - Level 9  (Log9) : ...
    16  *      - Level 10 (Log10): ...
    17  *      - Level 11 (Log11): Variable allocator.
    18  *      - Level 12 (Log12): Register allocator.
     3 * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
    194 */
    205
     
    5641#include <VBox/log.h>
    5742#include <VBox/err.h>
    58 #include <VBox/dis.h>
    5943#include <VBox/param.h>
    6044#include <iprt/assert.h>
    61 #include <iprt/heap.h>
    62 #include <iprt/mem.h>
    6345#include <iprt/string.h>
    6446#if   defined(RT_ARCH_AMD64)
     
    6850#endif
    6951
    70 #ifdef RT_OS_WINDOWS
    71 # include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
    72 extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
    73 extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
    74 #else
    75 # include <iprt/formats/dwarf.h>
    76 # if defined(RT_OS_DARWIN)
    77 #  include <libkern/OSCacheControl.h>
    78 #  define IEMNATIVE_USE_LIBUNWIND
    79 extern "C" void  __register_frame(const void *pvFde);
    80 extern "C" void  __deregister_frame(const void *pvFde);
    81 # else
    82 #  ifdef DEBUG_bird /** @todo not thread safe yet */
    83 #   define IEMNATIVE_USE_GDB_JIT
    84 #  endif
    85 #  ifdef IEMNATIVE_USE_GDB_JIT
    86 #   include <iprt/critsect.h>
    87 #   include <iprt/once.h>
    88 #   include <iprt/formats/elf64.h>
    89 #  endif
    90 extern "C" void  __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
    91 extern "C" void *__deregister_frame_info(void *pvBegin);           /* (returns pvObj from __register_frame_info call) */
    92 # endif
    93 #endif
    94 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    95 # include "/opt/local/include/capstone/capstone.h"
    96 #endif
    9752
    9853#include "IEMInline.h"
     
    10055#include "IEMN8veRecompiler.h"
    10156#include "IEMN8veRecompilerEmit.h"
    102 #include "IEMNativeFunctions.h"
    103 
    104 
    105 /*
    106  * Narrow down configs here to avoid wasting time on unused configs here.
    107  * Note! Same checks in IEMAllThrdRecompiler.cpp.
    108  */
    109 
    110 #ifndef IEM_WITH_CODE_TLB
    111 # error The code TLB must be enabled for the recompiler.
    112 #endif
    113 
    114 #ifndef IEM_WITH_DATA_TLB
    115 # error The data TLB must be enabled for the recompiler.
    116 #endif
    117 
    118 #ifndef IEM_WITH_SETJMP
    119 # error The setjmp approach must be enabled for the recompiler.
    120 #endif
    121 
    122 /** @todo eliminate this clang build hack. */
    123 #if RT_CLANG_PREREQ(4, 0)
    124 # pragma GCC diagnostic ignored "-Wunused-function"
    125 #endif
     57
    12658
    12759
    12860/*********************************************************************************************************************************
    129 *   Internal Functions                                                                                                           *
     61*   TB Helper Functions                                                                                                          *
    13062*********************************************************************************************************************************/
    131 #ifdef VBOX_STRICT
    132 static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    133                                                 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
    134 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
    135 #endif
    136 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    137 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
    138 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
    139 #endif
    140 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
    141 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
    142                                                             IEMNATIVEGSTREG enmGstReg, uint32_t off);
    143 
    144 
    145 /*********************************************************************************************************************************
    146 *   Executable Memory Allocator                                                                                                  *
    147 *********************************************************************************************************************************/
    148 /** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    149  * Use an alternative chunk sub-allocator that does store internal data
    150  * in the chunk.
    151  *
    152  * Using the RTHeapSimple is not practial on newer darwin systems where
    153  * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
    154  * memory.  We would have to change the protection of the whole chunk for
    155  * every call to RTHeapSimple, which would be rather expensive.
    156  *
    157  * This alternative implemenation let restrict page protection modifications
    158  * to the pages backing the executable memory we just allocated.
    159  */
    160 #define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    161 /** The chunk sub-allocation unit size in bytes. */
    162 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE      128
    163 /** The chunk sub-allocation unit size as a shift factor. */
    164 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT     7
    165 
    166 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    167 # ifdef IEMNATIVE_USE_GDB_JIT
    168 #   define IEMNATIVE_USE_GDB_JIT_ET_DYN
    169 
    170 /** GDB JIT: Code entry.   */
    171 typedef struct GDBJITCODEENTRY
    172 {
    173     struct GDBJITCODEENTRY *pNext;
    174     struct GDBJITCODEENTRY *pPrev;
    175     uint8_t                *pbSymFile;
    176     uint64_t                cbSymFile;
    177 } GDBJITCODEENTRY;
    178 
    179 /** GDB JIT: Actions. */
    180 typedef enum GDBJITACTIONS : uint32_t
    181 {
    182     kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
    183 } GDBJITACTIONS;
    184 
    185 /** GDB JIT: Descriptor. */
    186 typedef struct GDBJITDESCRIPTOR
    187 {
    188     uint32_t            uVersion;
    189     GDBJITACTIONS       enmAction;
    190     GDBJITCODEENTRY    *pRelevant;
    191     GDBJITCODEENTRY    *pHead;
    192     /** Our addition: */
    193     GDBJITCODEENTRY    *pTail;
    194 } GDBJITDESCRIPTOR;
    195 
    196 /** GDB JIT: Our simple symbol file data. */
    197 typedef struct GDBJITSYMFILE
    198 {
    199     Elf64_Ehdr          EHdr;
    200 #  ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
    201     Elf64_Shdr          aShdrs[5];
    202 #  else
    203     Elf64_Shdr          aShdrs[7];
    204     Elf64_Phdr          aPhdrs[2];
    205 #  endif
    206     /** The dwarf ehframe data for the chunk. */
    207     uint8_t             abEhFrame[512];
    208     char                szzStrTab[128];
    209     Elf64_Sym           aSymbols[3];
    210 #  ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    211     Elf64_Sym           aDynSyms[2];
    212     Elf64_Dyn           aDyn[6];
    213 #  endif
    214 } GDBJITSYMFILE;
    215 
    216 extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
    217 extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
    218 
    219 /** Init once for g_IemNativeGdbJitLock. */
    220 static RTONCE     g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
    221 /** Init once for the critical section. */
    222 static RTCRITSECT g_IemNativeGdbJitLock;
    223 
    224 /** GDB reads the info here. */
    225 GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
    226 
    227 /** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
    228 DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
    229 {
    230     ASMNopPause();
    231 }
    232 
    233 /** @callback_method_impl{FNRTONCE} */
    234 static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
    235 {
    236     RT_NOREF(pvUser);
    237     return RTCritSectInit(&g_IemNativeGdbJitLock);
    238 }
    239 
    240 
    241 # endif /* IEMNATIVE_USE_GDB_JIT */
    242 
    243 /**
    244  * Per-chunk unwind info for non-windows hosts.
    245  */
    246 typedef struct IEMEXECMEMCHUNKEHFRAME
    247 {
    248 # ifdef IEMNATIVE_USE_LIBUNWIND
    249     /** The offset of the FDA into abEhFrame. */
    250     uintptr_t               offFda;
    251 # else
    252     /** 'struct object' storage area. */
    253     uint8_t                 abObject[1024];
    254 # endif
    255 #  ifdef IEMNATIVE_USE_GDB_JIT
    256 #   if 0
    257     /** The GDB JIT 'symbol file' data. */
    258     GDBJITSYMFILE           GdbJitSymFile;
    259 #   endif
    260     /** The GDB JIT list entry. */
    261     GDBJITCODEENTRY         GdbJitEntry;
    262 #  endif
    263     /** The dwarf ehframe data for the chunk. */
    264     uint8_t                 abEhFrame[512];
    265 } IEMEXECMEMCHUNKEHFRAME;
    266 /** Pointer to per-chunk info info for non-windows hosts. */
    267 typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
    268 #endif
    269 
    270 
    271 /**
    272  * An chunk of executable memory.
    273  */
    274 typedef struct IEMEXECMEMCHUNK
    275 {
    276 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    277     /** Number of free items in this chunk. */
    278     uint32_t                cFreeUnits;
    279     /** Hint were to start searching for free space in the allocation bitmap. */
    280     uint32_t                idxFreeHint;
    281 #else
    282     /** The heap handle. */
    283     RTHEAPSIMPLE            hHeap;
    284 #endif
    285     /** Pointer to the chunk. */
    286     void                   *pvChunk;
    287 #ifdef IN_RING3
    288     /**
    289      * Pointer to the unwind information.
    290      *
    291      * This is used during C++ throw and longjmp (windows and probably most other
    292      * platforms).  Some debuggers (windbg) makes use of it as well.
    293      *
    294      * Windows: This is allocated from hHeap on windows because (at least for
    295      *          AMD64) the UNWIND_INFO structure address in the
    296      *          RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
    297      *
    298      * Others:  Allocated from the regular heap to avoid unnecessary executable data
    299      *          structures.  This points to an IEMEXECMEMCHUNKEHFRAME structure. */
    300     void                   *pvUnwindInfo;
    301 #elif defined(IN_RING0)
    302     /** Allocation handle. */
    303     RTR0MEMOBJ              hMemObj;
    304 #endif
    305 } IEMEXECMEMCHUNK;
    306 /** Pointer to a memory chunk. */
    307 typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
    308 
    309 
    310 /**
    311  * Executable memory allocator for the native recompiler.
    312  */
    313 typedef struct IEMEXECMEMALLOCATOR
    314 {
    315     /** Magic value (IEMEXECMEMALLOCATOR_MAGIC).  */
    316     uint32_t                uMagic;
    317 
    318     /** The chunk size. */
    319     uint32_t                cbChunk;
    320     /** The maximum number of chunks. */
    321     uint32_t                cMaxChunks;
    322     /** The current number of chunks. */
    323     uint32_t                cChunks;
    324     /** Hint where to start looking for available memory. */
    325     uint32_t                idxChunkHint;
    326     /** Statistics: Current number of allocations. */
    327     uint32_t                cAllocations;
    328 
    329     /** The total amount of memory available. */
    330     uint64_t                cbTotal;
    331     /** Total amount of free memory. */
    332     uint64_t                cbFree;
    333     /** Total amount of memory allocated. */
    334     uint64_t                cbAllocated;
    335 
    336 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    337     /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
    338      *
    339      * Since the chunk size is a power of two and the minimum chunk size is a lot
    340      * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
    341      * require a whole number of uint64_t elements in the allocation bitmap.  So,
    342      * for sake of simplicity, they are allocated as one continous chunk for
    343      * simplicity/laziness. */
    344     uint64_t               *pbmAlloc;
    345     /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
    346     uint32_t                cUnitsPerChunk;
    347     /** Number of bitmap elements per chunk (for quickly locating the bitmap
    348      * portion corresponding to an chunk). */
    349     uint32_t                cBitmapElementsPerChunk;
    350 #else
    351     /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
    352      * @{ */
    353     /** The size of the heap internal block header.   This is used to adjust the
    354      * request memory size to make sure there is exacly enough room for a header at
    355      * the end of the blocks we allocate before the next 64 byte alignment line. */
    356     uint32_t                cbHeapBlockHdr;
    357     /** The size of initial heap allocation required make sure the first
    358      *  allocation is correctly aligned. */
    359     uint32_t                cbHeapAlignTweak;
    360     /** The alignment tweak allocation address. */
    361     void                   *pvAlignTweak;
    362     /** @} */
    363 #endif
    364 
    365 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    366     /** Pointer to the array of unwind info running parallel to aChunks (same
    367      * allocation as this structure, located after the bitmaps).
    368      * (For Windows, the structures must reside in 32-bit RVA distance to the
    369      * actual chunk, so they are allocated off the chunk.) */
    370     PIEMEXECMEMCHUNKEHFRAME paEhFrames;
    371 #endif
    372 
    373     /** The allocation chunks. */
    374     RT_FLEXIBLE_ARRAY_EXTENSION
    375     IEMEXECMEMCHUNK         aChunks[RT_FLEXIBLE_ARRAY];
    376 } IEMEXECMEMALLOCATOR;
    377 /** Pointer to an executable memory allocator. */
    378 typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
    379 
    380 /** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
    381 #define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
    382 
    383 
    384 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
    385 
    386 
    387 /**
    388  * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
    389  * the heap statistics.
    390  */
    391 static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
    392                                                uint32_t cbReq, uint32_t idxChunk)
    393 {
    394     pExecMemAllocator->cAllocations += 1;
    395     pExecMemAllocator->cbAllocated  += cbReq;
    396 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    397     pExecMemAllocator->cbFree       -= cbReq;
    398 #else
    399     pExecMemAllocator->cbFree       -= RT_ALIGN_32(cbReq, 64);
    400 #endif
    401     pExecMemAllocator->idxChunkHint  = idxChunk;
    402 
    403 #ifdef RT_OS_DARWIN
    404     /*
    405      * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
    406      * on darwin.  So, we mark the pages returned as read+write after alloc and
    407      * expect the caller to call iemExecMemAllocatorReadyForUse when done
    408      * writing to the allocation.
    409      *
    410      * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    411      * for details.
    412      */
    413     /** @todo detect if this is necessary... it wasn't required on 10.15 or
    414      *        whatever older version it was. */
    415     int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
    416     AssertRC(rc);
    417 #endif
    418 
    419     return pvRet;
    420 }
    421 
    422 
    423 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    424 static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
    425                                                 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
    426 {
    427     /*
    428      * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
    429      */
    430     Assert(!(cToScan & 63));
    431     Assert(!(idxFirst & 63));
    432     Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
    433     pbmAlloc += idxFirst / 64;
    434 
    435     /*
    436      * Scan the bitmap for cReqUnits of consequtive clear bits
    437      */
    438     /** @todo This can probably be done more efficiently for non-x86 systems. */
    439     int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
    440     while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
    441     {
    442         uint32_t idxAddBit = 1;
    443         while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
    444             idxAddBit++;
    445         if (idxAddBit >= cReqUnits)
    446         {
    447             ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
    448 
    449             PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
    450             pChunk->cFreeUnits -= cReqUnits;
    451             pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
    452 
    453             void * const pvRet  = (uint8_t *)pChunk->pvChunk
    454                                 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
    455 
    456             return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
    457                                                     cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
    458         }
    459 
    460         iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
    461     }
    462     return NULL;
    463 }
    464 #endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    465 
    466 
    467 static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
    468 {
    469 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    470     /*
    471      * Figure out how much to allocate.
    472      */
    473     uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    474     if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
    475     {
    476         uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    477         uint32_t const   idxHint  = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
    478         if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
    479         {
    480             void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
    481                                                              pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
    482             if (pvRet)
    483                 return pvRet;
    484         }
    485         return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
    486                                                   RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
    487                                                   cReqUnits, idxChunk);
    488     }
    489 #else
    490     void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
    491     if (pvRet)
    492         return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
    493 #endif
    494     return NULL;
    495 
    496 }
    497 
    498 
    499 /**
    500  * Allocates @a cbReq bytes of executable memory.
    501  *
    502  * @returns Pointer to the memory, NULL if out of memory or other problem
    503  *          encountered.
    504  * @param   pVCpu   The cross context virtual CPU structure of the calling
    505  *                  thread.
    506  * @param   cbReq   How many bytes are required.
    507  */
    508 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
    509 {
    510     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    511     AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
    512     AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
    513 
    514 
    515     for (unsigned iIteration = 0;; iIteration++)
    516     {
    517         /*
    518          * Adjust the request size so it'll fit the allocator alignment/whatnot.
    519          *
    520          * For the RTHeapSimple allocator this means to follow the logic described
    521          * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
    522          * existing chunks if we think we've got sufficient free memory around.
    523          *
    524          * While for the alternative one we just align it up to a whole unit size.
    525          */
    526 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    527         cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    528 #else
    529         cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    530 #endif
    531         if (cbReq <= pExecMemAllocator->cbFree)
    532         {
    533             uint32_t const cChunks      = pExecMemAllocator->cChunks;
    534             uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
    535             for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
    536             {
    537                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    538                 if (pvRet)
    539                     return pvRet;
    540             }
    541             for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
    542             {
    543                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    544                 if (pvRet)
    545                     return pvRet;
    546             }
    547         }
    548 
    549         /*
    550          * Can we grow it with another chunk?
    551          */
    552         if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
    553         {
    554             int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    555             AssertLogRelRCReturn(rc, NULL);
    556 
    557             uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
    558             void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    559             if (pvRet)
    560                 return pvRet;
    561             AssertFailed();
    562         }
    563 
    564         /*
    565          * Try prune native TBs once.
    566          */
    567         if (iIteration == 0)
    568             iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
    569         else
    570         {
    571             /** @todo stats...   */
    572             return NULL;
    573         }
    574     }
    575 
    576 }
    577 
    578 
    579 /** This is a hook that we may need later for changing memory protection back
    580  *  to readonly+exec */
    581 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
    582 {
    583 #ifdef RT_OS_DARWIN
    584     /* See iemExecMemAllocatorAllocTailCode for the explanation. */
    585     int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
    586     AssertRC(rc); RT_NOREF(pVCpu);
    587 
    588     /*
    589      * Flush the instruction cache:
    590      *      https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    591      */
    592     /* sys_dcache_flush(pv, cb); - not necessary */
    593     sys_icache_invalidate(pv, cb);
    594 #else
    595     RT_NOREF(pVCpu, pv, cb);
    596 #endif
    597 }
    598 
    599 
    600 /**
    601  * Frees executable memory.
    602  */
    603 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
    604 {
    605     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    606     Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
    607     Assert(pv);
    608 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    609     Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
    610 #else
    611     Assert(!((uintptr_t)pv & 63));
    612 #endif
    613 
    614     /* Align the size as we did when allocating the block. */
    615 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    616     cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    617 #else
    618     cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    619 #endif
    620 
    621     /* Free it / assert sanity. */
    622 #if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
    623     uint32_t const cChunks = pExecMemAllocator->cChunks;
    624     uint32_t const cbChunk = pExecMemAllocator->cbChunk;
    625     bool           fFound  = false;
    626     for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
    627     {
    628         uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
    629         fFound = offChunk < cbChunk;
    630         if (fFound)
    631         {
    632 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    633             uint32_t const idxFirst  = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    634             uint32_t const cReqUnits = (uint32_t)cb       >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    635 
    636             /* Check that it's valid and free it. */
    637             uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    638             AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
    639             for (uint32_t i = 1; i < cReqUnits; i++)
    640                 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
    641             ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
    642 
    643             pExecMemAllocator->aChunks[idxChunk].cFreeUnits  += cReqUnits;
    644             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = idxFirst;
    645 
    646             /* Update the stats. */
    647             pExecMemAllocator->cbAllocated  -= cb;
    648             pExecMemAllocator->cbFree       += cb;
    649             pExecMemAllocator->cAllocations -= 1;
    650             return;
    651 #else
    652             Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
    653             break;
    654 #endif
    655         }
    656     }
    657 # ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    658     AssertFailed();
    659 # else
    660     Assert(fFound);
    661 # endif
    662 #endif
    663 
    664 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    665     /* Update stats while cb is freshly calculated.*/
    666     pExecMemAllocator->cbAllocated  -= cb;
    667     pExecMemAllocator->cbFree       += RT_ALIGN_Z(cb, 64);
    668     pExecMemAllocator->cAllocations -= 1;
    669 
    670     /* Free it. */
    671     RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
    672 #endif
    673 }
    674 
    675 
    676 
    677 #ifdef IN_RING3
    678 # ifdef RT_OS_WINDOWS
    679 
    680 /**
    681  * Initializes the unwind info structures for windows hosts.
    682  */
    683 static int
    684 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    685                                                      void *pvChunk, uint32_t idxChunk)
    686 {
    687     RT_NOREF(pVCpu);
    688 
    689     /*
    690      * The AMD64 unwind opcodes.
    691      *
    692      * This is a program that starts with RSP after a RET instruction that
    693      * ends up in recompiled code, and the operations we describe here will
    694      * restore all non-volatile registers and bring RSP back to where our
    695      * RET address is.  This means it's reverse order from what happens in
    696      * the prologue.
    697      *
    698      * Note! Using a frame register approach here both because we have one
    699      *       and but mainly because the UWOP_ALLOC_LARGE argument values
    700      *       would be a pain to write initializers for.  On the positive
    701      *       side, we're impervious to changes in the the stack variable
    702      *       area can can deal with dynamic stack allocations if necessary.
    703      */
    704     static const IMAGE_UNWIND_CODE s_aOpcodes[] =
    705     {
    706         { { 16, IMAGE_AMD64_UWOP_SET_FPREG,     0 } },              /* RSP  = RBP - FrameOffset * 10 (0x60) */
    707         { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL,   0 } },              /* RSP += 8; */
    708         { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x15 } },   /* R15  = [RSP]; RSP += 8; */
    709         { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x14 } },   /* R14  = [RSP]; RSP += 8; */
    710         { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x13 } },   /* R13  = [RSP]; RSP += 8; */
    711         { {  8, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x12 } },   /* R12  = [RSP]; RSP += 8; */
    712         { {  7, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xDI } },   /* RDI  = [RSP]; RSP += 8; */
    713         { {  6, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xSI } },   /* RSI  = [RSP]; RSP += 8; */
    714         { {  5, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBX } },   /* RBX  = [RSP]; RSP += 8; */
    715         { {  4, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBP } },   /* RBP  = [RSP]; RSP += 8; */
    716     };
    717     union
    718     {
    719         IMAGE_UNWIND_INFO Info;
    720         uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
    721     } s_UnwindInfo =
    722     {
    723         {
    724             /* .Version = */        1,
    725             /* .Flags = */          0,
    726             /* .SizeOfProlog = */   16, /* whatever */
    727             /* .CountOfCodes = */   RT_ELEMENTS(s_aOpcodes),
    728             /* .FrameRegister = */  X86_GREG_xBP,
    729             /* .FrameOffset = */    (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
    730         }
    731     };
    732     AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
    733     AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
    734 
    735     /*
    736      * Calc how much space we need and allocate it off the exec heap.
    737      */
    738     unsigned const cFunctionEntries = 1;
    739     unsigned const cbUnwindInfo     = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
    740     unsigned const cbNeeded         = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
    741 #  ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    742     unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    743     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
    744         = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
    745 #  else
    746     unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    747                                     - pExecMemAllocator->cbHeapBlockHdr;
    748     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
    749                                                                                                        32 /*cbAlignment*/);
    750 #  endif
    751     AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
    752     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
    753 
    754     /*
    755      * Initialize the structures.
    756      */
    757     PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
    758 
    759     paFunctions[0].BeginAddress         = 0;
    760     paFunctions[0].EndAddress           = pExecMemAllocator->cbChunk;
    761     paFunctions[0].UnwindInfoAddress    = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
    762 
    763     memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
    764     memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
    765 
    766     /*
    767      * Register it.
    768      */
    769     uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
    770     AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
    771 
    772     return VINF_SUCCESS;
    773 }
    774 
    775 
    776 # else /* !RT_OS_WINDOWS */
    777 
    778 /**
    779  * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
    780  */
    781 DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
    782 {
    783     if (iValue >= 64)
    784     {
    785         Assert(iValue < 0x2000);
    786         *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
    787         *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
    788     }
    789     else if (iValue >= 0)
    790         *Ptr.pb++ = (uint8_t)iValue;
    791     else if (iValue > -64)
    792         *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
    793     else
    794     {
    795         Assert(iValue > -0x2000);
    796         *Ptr.pb++ = ((uint8_t)iValue & 0x7f)        | 0x80;
    797         *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
    798     }
    799     return Ptr;
    800 }
    801 
    802 
    803 /**
    804  * Emits an ULEB128 encoded value (up to 64-bit wide).
    805  */
    806 DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
    807 {
    808     while (uValue >= 0x80)
    809     {
    810         *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
    811         uValue  >>= 7;
    812     }
    813     *Ptr.pb++ = (uint8_t)uValue;
    814     return Ptr;
    815 }
    816 
    817 
    818 /**
    819  * Emits a CFA rule as register @a uReg + offset @a off.
    820  */
    821 DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    822 {
    823     *Ptr.pb++ = DW_CFA_def_cfa;
    824     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    825     Ptr = iemDwarfPutUleb128(Ptr, off);
    826     return Ptr;
    827 }
    828 
    829 
    830 /**
    831  * Emits a register (@a uReg) save location:
    832  *      CFA + @a off * data_alignment_factor
    833  */
    834 DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    835 {
    836     if (uReg < 0x40)
    837         *Ptr.pb++ = DW_CFA_offset | uReg;
    838     else
    839     {
    840         *Ptr.pb++ = DW_CFA_offset_extended;
    841         Ptr = iemDwarfPutUleb128(Ptr, uReg);
    842     }
    843     Ptr = iemDwarfPutUleb128(Ptr, off);
    844     return Ptr;
    845 }
    846 
    847 
    848 #  if 0 /* unused */
    849 /**
    850  * Emits a register (@a uReg) save location, using signed offset:
    851  *      CFA + @a offSigned * data_alignment_factor
    852  */
    853 DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
    854 {
    855     *Ptr.pb++ = DW_CFA_offset_extended_sf;
    856     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    857     Ptr = iemDwarfPutLeb128(Ptr, offSigned);
    858     return Ptr;
    859 }
    860 #  endif
    861 
    862 
    863 /**
    864  * Initializes the unwind info section for non-windows hosts.
    865  */
    866 static int
    867 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    868                                                      void *pvChunk, uint32_t idxChunk)
    869 {
    870     PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
    871     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
    872 
    873     RTPTRUNION Ptr = { pEhFrame->abEhFrame };
    874 
    875     /*
    876      * Generate the CIE first.
    877      */
    878 #  ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
    879     uint8_t const iDwarfVer = 3;
    880 #  else
    881     uint8_t const iDwarfVer = 4;
    882 #  endif
    883     RTPTRUNION const PtrCie = Ptr;
    884     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    885     *Ptr.pu32++ = 0 /*UINT32_MAX*/;                         /* I'm a CIE in .eh_frame speak. */
    886     *Ptr.pb++   = iDwarfVer;                                /* DwARF version */
    887     *Ptr.pb++   = 0;                                        /* Augmentation. */
    888     if (iDwarfVer >= 4)
    889     {
    890         *Ptr.pb++   = sizeof(uintptr_t);                    /* Address size. */
    891         *Ptr.pb++   = 0;                                    /* Segment selector size. */
    892     }
    893 #  ifdef RT_ARCH_AMD64
    894     Ptr = iemDwarfPutLeb128(Ptr, 1);                        /* Code alignment factor (LEB128 = 1). */
    895 #  else
    896     Ptr = iemDwarfPutLeb128(Ptr, 4);                        /* Code alignment factor (LEB128 = 4). */
    897 #  endif
    898     Ptr = iemDwarfPutLeb128(Ptr, -8);                       /* Data alignment factor (LEB128 = -8). */
    899 #  ifdef RT_ARCH_AMD64
    900     Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA);          /* Return address column (ULEB128) */
    901 #  elif defined(RT_ARCH_ARM64)
    902     Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR);          /* Return address column (ULEB128) */
    903 #  else
    904 #   error "port me"
    905 #  endif
    906     /* Initial instructions: */
    907 #  ifdef RT_ARCH_AMD64
    908     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16);   /* CFA     = RBP + 0x10 - first stack parameter */
    909     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA,  1);    /* Ret RIP = [CFA + 1*-8] */
    910     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2);    /* RBP     = [CFA + 2*-8] */
    911     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3);    /* RBX     = [CFA + 3*-8] */
    912     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4);    /* R12     = [CFA + 4*-8] */
    913     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5);    /* R13     = [CFA + 5*-8] */
    914     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6);    /* R14     = [CFA + 6*-8] */
    915     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7);    /* R15     = [CFA + 7*-8] */
    916 #  elif defined(RT_ARCH_ARM64)
    917 #   if 1
    918     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP,  16);   /* CFA     = BP + 0x10 - first stack parameter */
    919 #   else
    920     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP,  IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
    921 #   endif
    922     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR,   1);   /* Ret PC  = [CFA + 1*-8] */
    923     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP,   2);   /* Ret BP  = [CFA + 2*-8] */
    924     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28,  3);   /* X28     = [CFA + 3*-8] */
    925     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27,  4);   /* X27     = [CFA + 4*-8] */
    926     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26,  5);   /* X26     = [CFA + 5*-8] */
    927     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25,  6);   /* X25     = [CFA + 6*-8] */
    928     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24,  7);   /* X24     = [CFA + 7*-8] */
    929     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23,  8);   /* X23     = [CFA + 8*-8] */
    930     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22,  9);   /* X22     = [CFA + 9*-8] */
    931     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10);   /* X21     = [CFA +10*-8] */
    932     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11);   /* X20     = [CFA +11*-8] */
    933     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12);   /* X19     = [CFA +12*-8] */
    934     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    935     /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
    936 #  else
    937 #   error "port me"
    938 #  endif
    939     while ((Ptr.u - PtrCie.u) & 3)
    940         *Ptr.pb++ = DW_CFA_nop;
    941     /* Finalize the CIE size. */
    942     *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
    943 
    944     /*
    945      * Generate an FDE for the whole chunk area.
    946      */
    947 #  ifdef IEMNATIVE_USE_LIBUNWIND
    948     pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
    949 #  endif
    950     RTPTRUNION const PtrFde = Ptr;
    951     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    952     *Ptr.pu32   = Ptr.u - PtrCie.u;                         /* Negated self relative CIE address. */
    953     Ptr.pu32++;
    954     *Ptr.pu64++ = (uintptr_t)pvChunk;                       /* Absolute start PC of this FDE. */
    955     *Ptr.pu64++ = pExecMemAllocator->cbChunk;               /* PC range length for this PDE. */
    956 #  if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
    957     *Ptr.pb++ = DW_CFA_nop;
    958 #  endif
    959     while ((Ptr.u - PtrFde.u) & 3)
    960         *Ptr.pb++ = DW_CFA_nop;
    961     /* Finalize the FDE size. */
    962     *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
    963 
    964     /* Terminator entry. */
    965     *Ptr.pu32++ = 0;
    966     *Ptr.pu32++ = 0;            /* just to be sure... */
    967     Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
    968 
    969     /*
    970      * Register it.
    971      */
    972 #  ifdef IEMNATIVE_USE_LIBUNWIND
    973     __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
    974 #  else
    975     memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
    976     __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
    977 #  endif
    978 
    979 #  ifdef IEMNATIVE_USE_GDB_JIT
    980     /*
    981      * Now for telling GDB about this (experimental).
    982      *
    983      * This seems to work best with ET_DYN.
    984      */
    985     unsigned const cbNeeded        = sizeof(GDBJITSYMFILE);
    986 #   ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    987     unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    988     GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
    989 #   else
    990     unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    991                                    - pExecMemAllocator->cbHeapBlockHdr;
    992     GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
    993 #   endif
    994     AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
    995     unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
    996 
    997     RT_ZERO(*pSymFile);
    998 
    999     /*
    1000      * The ELF header:
    1001      */
    1002     pSymFile->EHdr.e_ident[0]           = ELFMAG0;
    1003     pSymFile->EHdr.e_ident[1]           = ELFMAG1;
    1004     pSymFile->EHdr.e_ident[2]           = ELFMAG2;
    1005     pSymFile->EHdr.e_ident[3]           = ELFMAG3;
    1006     pSymFile->EHdr.e_ident[EI_VERSION]  = EV_CURRENT;
    1007     pSymFile->EHdr.e_ident[EI_CLASS]    = ELFCLASS64;
    1008     pSymFile->EHdr.e_ident[EI_DATA]     = ELFDATA2LSB;
    1009     pSymFile->EHdr.e_ident[EI_OSABI]    = ELFOSABI_NONE;
    1010 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1011     pSymFile->EHdr.e_type               = ET_DYN;
    1012 #   else
    1013     pSymFile->EHdr.e_type               = ET_REL;
    1014 #   endif
    1015 #   ifdef RT_ARCH_AMD64
    1016     pSymFile->EHdr.e_machine            = EM_AMD64;
    1017 #   elif defined(RT_ARCH_ARM64)
    1018     pSymFile->EHdr.e_machine            = EM_AARCH64;
    1019 #   else
    1020 #    error "port me"
    1021 #   endif
    1022     pSymFile->EHdr.e_version            = 1; /*?*/
    1023     pSymFile->EHdr.e_entry              = 0;
    1024 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1025     pSymFile->EHdr.e_phoff              = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
    1026 #   else
    1027     pSymFile->EHdr.e_phoff              = 0;
    1028 #   endif
    1029     pSymFile->EHdr.e_shoff              = sizeof(pSymFile->EHdr);
    1030     pSymFile->EHdr.e_flags              = 0;
    1031     pSymFile->EHdr.e_ehsize             = sizeof(pSymFile->EHdr);
    1032 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1033     pSymFile->EHdr.e_phentsize          = sizeof(pSymFile->aPhdrs[0]);
    1034     pSymFile->EHdr.e_phnum              = RT_ELEMENTS(pSymFile->aPhdrs);
    1035 #   else
    1036     pSymFile->EHdr.e_phentsize          = 0;
    1037     pSymFile->EHdr.e_phnum              = 0;
    1038 #   endif
    1039     pSymFile->EHdr.e_shentsize          = sizeof(pSymFile->aShdrs[0]);
    1040     pSymFile->EHdr.e_shnum              = RT_ELEMENTS(pSymFile->aShdrs);
    1041     pSymFile->EHdr.e_shstrndx           = 0; /* set later */
    1042 
    1043     uint32_t offStrTab = 0;
    1044 #define APPEND_STR(a_szStr) do { \
    1045         memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
    1046         offStrTab += sizeof(a_szStr); \
    1047         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1048     } while (0)
    1049 #define APPEND_STR_FMT(a_szStr, ...) do { \
    1050         offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
    1051         offStrTab++; \
    1052         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1053     } while (0)
    1054 
    1055     /*
    1056      * Section headers.
    1057      */
    1058     /* Section header #0: NULL */
    1059     unsigned i = 0;
    1060     APPEND_STR("");
    1061     RT_ZERO(pSymFile->aShdrs[i]);
    1062     i++;
    1063 
    1064     /* Section header: .eh_frame */
    1065     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1066     APPEND_STR(".eh_frame");
    1067     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1068     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1069 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1070     pSymFile->aShdrs[i].sh_offset
    1071         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
    1072 #   else
    1073     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->abEhFrame[0];
    1074     pSymFile->aShdrs[i].sh_offset       = 0;
    1075 #   endif
    1076 
    1077     pSymFile->aShdrs[i].sh_size         = sizeof(pEhFrame->abEhFrame);
    1078     pSymFile->aShdrs[i].sh_link         = 0;
    1079     pSymFile->aShdrs[i].sh_info         = 0;
    1080     pSymFile->aShdrs[i].sh_addralign    = 1;
    1081     pSymFile->aShdrs[i].sh_entsize      = 0;
    1082     memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
    1083     i++;
    1084 
    1085     /* Section header: .shstrtab */
    1086     unsigned const iShStrTab = i;
    1087     pSymFile->EHdr.e_shstrndx           = iShStrTab;
    1088     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1089     APPEND_STR(".shstrtab");
    1090     pSymFile->aShdrs[i].sh_type         = SHT_STRTAB;
    1091     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1092 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1093     pSymFile->aShdrs[i].sh_offset
    1094         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1095 #   else
    1096     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->szzStrTab[0];
    1097     pSymFile->aShdrs[i].sh_offset       = 0;
    1098 #   endif
    1099     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->szzStrTab);
    1100     pSymFile->aShdrs[i].sh_link         = 0;
    1101     pSymFile->aShdrs[i].sh_info         = 0;
    1102     pSymFile->aShdrs[i].sh_addralign    = 1;
    1103     pSymFile->aShdrs[i].sh_entsize      = 0;
    1104     i++;
    1105 
    1106     /* Section header: .symbols */
    1107     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1108     APPEND_STR(".symtab");
    1109     pSymFile->aShdrs[i].sh_type         = SHT_SYMTAB;
    1110     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1111     pSymFile->aShdrs[i].sh_offset
    1112         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
    1113     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aSymbols);
    1114     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1115     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aSymbols);
    1116     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aSymbols[0].st_value);
    1117     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aSymbols[0]);
    1118     i++;
    1119 
    1120 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1121     /* Section header: .symbols */
    1122     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1123     APPEND_STR(".dynsym");
    1124     pSymFile->aShdrs[i].sh_type         = SHT_DYNSYM;
    1125     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1126     pSymFile->aShdrs[i].sh_offset
    1127         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1128     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDynSyms);
    1129     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1130     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aDynSyms);
    1131     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aDynSyms[0].st_value);
    1132     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDynSyms[0]);
    1133     i++;
    1134 #   endif
    1135 
    1136 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1137     /* Section header: .dynamic */
    1138     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1139     APPEND_STR(".dynamic");
    1140     pSymFile->aShdrs[i].sh_type         = SHT_DYNAMIC;
    1141     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1142     pSymFile->aShdrs[i].sh_offset
    1143         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1144     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDyn);
    1145     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1146     pSymFile->aShdrs[i].sh_info         = 0;
    1147     pSymFile->aShdrs[i].sh_addralign    = 1;
    1148     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDyn[0]);
    1149     i++;
    1150 #   endif
    1151 
    1152     /* Section header: .text */
    1153     unsigned const iShText = i;
    1154     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1155     APPEND_STR(".text");
    1156     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1157     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1158 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1159     pSymFile->aShdrs[i].sh_offset
    1160         = pSymFile->aShdrs[i].sh_addr   = sizeof(GDBJITSYMFILE);
    1161 #   else
    1162     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)(pSymFile + 1);
    1163     pSymFile->aShdrs[i].sh_offset       = 0;
    1164 #   endif
    1165     pSymFile->aShdrs[i].sh_size         = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
    1166     pSymFile->aShdrs[i].sh_link         = 0;
    1167     pSymFile->aShdrs[i].sh_info         = 0;
    1168     pSymFile->aShdrs[i].sh_addralign    = 1;
    1169     pSymFile->aShdrs[i].sh_entsize      = 0;
    1170     i++;
    1171 
    1172     Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
    1173 
    1174 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1175     /*
    1176      * The program headers:
    1177      */
    1178     /* Everything in a single LOAD segment: */
    1179     i = 0;
    1180     pSymFile->aPhdrs[i].p_type          = PT_LOAD;
    1181     pSymFile->aPhdrs[i].p_flags         = PF_X | PF_R;
    1182     pSymFile->aPhdrs[i].p_offset
    1183         = pSymFile->aPhdrs[i].p_vaddr
    1184         = pSymFile->aPhdrs[i].p_paddr   = 0;
    1185     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1186         = pSymFile->aPhdrs[i].p_memsz   = pExecMemAllocator->cbChunk - offSymFileInChunk;
    1187     pSymFile->aPhdrs[i].p_align         = HOST_PAGE_SIZE;
    1188     i++;
    1189     /* The .dynamic segment. */
    1190     pSymFile->aPhdrs[i].p_type          = PT_DYNAMIC;
    1191     pSymFile->aPhdrs[i].p_flags         = PF_R;
    1192     pSymFile->aPhdrs[i].p_offset
    1193         = pSymFile->aPhdrs[i].p_vaddr
    1194         = pSymFile->aPhdrs[i].p_paddr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1195     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1196         = pSymFile->aPhdrs[i].p_memsz   = sizeof(pSymFile->aDyn);
    1197     pSymFile->aPhdrs[i].p_align         = sizeof(pSymFile->aDyn[0].d_tag);
    1198     i++;
    1199 
    1200     Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
    1201 
    1202     /*
    1203      * The dynamic section:
    1204      */
    1205     i = 0;
    1206     pSymFile->aDyn[i].d_tag             = DT_SONAME;
    1207     pSymFile->aDyn[i].d_un.d_val        = offStrTab;
    1208     APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
    1209     i++;
    1210     pSymFile->aDyn[i].d_tag             = DT_STRTAB;
    1211     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1212     i++;
    1213     pSymFile->aDyn[i].d_tag             = DT_STRSZ;
    1214     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->szzStrTab);
    1215     i++;
    1216     pSymFile->aDyn[i].d_tag             = DT_SYMTAB;
    1217     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1218     i++;
    1219     pSymFile->aDyn[i].d_tag             = DT_SYMENT;
    1220     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->aDynSyms[0]);
    1221     i++;
    1222     pSymFile->aDyn[i].d_tag             = DT_NULL;
    1223     i++;
    1224     Assert(i == RT_ELEMENTS(pSymFile->aDyn));
    1225 #   endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
    1226 
    1227     /*
    1228      * Symbol tables:
    1229      */
    1230     /** @todo gdb doesn't seem to really like this ...   */
    1231     i = 0;
    1232     pSymFile->aSymbols[i].st_name       = 0;
    1233     pSymFile->aSymbols[i].st_shndx      = SHN_UNDEF;
    1234     pSymFile->aSymbols[i].st_value      = 0;
    1235     pSymFile->aSymbols[i].st_size       = 0;
    1236     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
    1237     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1238 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1239     pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
    1240 #   endif
    1241     i++;
    1242 
    1243     pSymFile->aSymbols[i].st_name       = 0;
    1244     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1245     pSymFile->aSymbols[i].st_value      = 0;
    1246     pSymFile->aSymbols[i].st_size       = 0;
    1247     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
    1248     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1249     i++;
    1250 
    1251     pSymFile->aSymbols[i].st_name       = offStrTab;
    1252     APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
    1253 #   if 0
    1254     pSymFile->aSymbols[i].st_shndx      = iShText;
    1255     pSymFile->aSymbols[i].st_value      = 0;
    1256 #   else
    1257     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1258     pSymFile->aSymbols[i].st_value      = (uintptr_t)(pSymFile + 1);
    1259 #   endif
    1260     pSymFile->aSymbols[i].st_size       = pSymFile->aShdrs[iShText].sh_size;
    1261     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
    1262     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1263 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1264     pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
    1265     pSymFile->aDynSyms[1].st_value      = (uintptr_t)(pSymFile + 1);
    1266 #   endif
    1267     i++;
    1268 
    1269     Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
    1270     Assert(offStrTab < sizeof(pSymFile->szzStrTab));
    1271 
    1272     /*
    1273      * The GDB JIT entry and informing GDB.
    1274      */
    1275     pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
    1276 #   if 1
    1277     pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
    1278 #   else
    1279     pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
    1280 #   endif
    1281 
    1282     RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
    1283     RTCritSectEnter(&g_IemNativeGdbJitLock);
    1284     pEhFrame->GdbJitEntry.pNext      = NULL;
    1285     pEhFrame->GdbJitEntry.pPrev      = __jit_debug_descriptor.pTail;
    1286     if (__jit_debug_descriptor.pTail)
    1287         __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
    1288     else
    1289         __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
    1290     __jit_debug_descriptor.pTail     = &pEhFrame->GdbJitEntry;
    1291     __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
    1292 
    1293     /* Notify GDB: */
    1294     __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
    1295     __jit_debug_register_code();
    1296     __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
    1297     RTCritSectLeave(&g_IemNativeGdbJitLock);
    1298 
    1299 #  else  /* !IEMNATIVE_USE_GDB_JIT */
    1300     RT_NOREF(pVCpu);
    1301 #  endif /* !IEMNATIVE_USE_GDB_JIT */
    1302 
    1303     return VINF_SUCCESS;
    1304 }
    1305 
    1306 # endif /* !RT_OS_WINDOWS */
    1307 #endif /* IN_RING3 */
    1308 
    1309 
    1310 /**
    1311  * Adds another chunk to the executable memory allocator.
    1312  *
    1313  * This is used by the init code for the initial allocation and later by the
    1314  * regular allocator function when it's out of memory.
    1315  */
    1316 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
    1317 {
    1318     /* Check that we've room for growth. */
    1319     uint32_t const idxChunk = pExecMemAllocator->cChunks;
    1320     AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
    1321 
    1322     /* Allocate a chunk. */
    1323 #ifdef RT_OS_DARWIN
    1324     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
    1325 #else
    1326     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
    1327 #endif
    1328     AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
    1329 
    1330 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1331     int rc = VINF_SUCCESS;
    1332 #else
    1333     /* Initialize the heap for the chunk. */
    1334     RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
    1335     int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
    1336     AssertRC(rc);
    1337     if (RT_SUCCESS(rc))
    1338     {
    1339         /*
    1340          * We want the memory to be aligned on 64 byte, so the first time thru
    1341          * here we do some exploratory allocations to see how we can achieve this.
    1342          * On subsequent runs we only make an initial adjustment allocation, if
    1343          * necessary.
    1344          *
    1345          * Since we own the heap implementation, we know that the internal block
    1346          * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
    1347          * so all we need to wrt allocation size adjustments is to add 32 bytes
    1348          * to the size, align up by 64 bytes, and subtract 32 bytes.
    1349          *
    1350          * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
    1351          * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
    1352          * allocation to force subsequent allocations to return 64 byte aligned
    1353          * user areas.
    1354          */
    1355         if (!pExecMemAllocator->cbHeapBlockHdr)
    1356         {
    1357             pExecMemAllocator->cbHeapBlockHdr   = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
    1358             pExecMemAllocator->cbHeapAlignTweak = 64;
    1359             pExecMemAllocator->pvAlignTweak     = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
    1360                                                                     32 /*cbAlignment*/);
    1361             AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
    1362 
    1363             void *pvTest1 = RTHeapSimpleAlloc(hHeap,
    1364                                                 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
    1365                                               - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
    1366             AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
    1367             AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
    1368 
    1369             void *pvTest2 = RTHeapSimpleAlloc(hHeap,
    1370                                                 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
    1371                                               - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
    1372             AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
    1373             AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
    1374 
    1375             RTHeapSimpleFree(hHeap, pvTest2);
    1376             RTHeapSimpleFree(hHeap, pvTest1);
    1377         }
    1378         else
    1379         {
    1380             pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap,  pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
    1381             AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
    1382         }
    1383         if (RT_SUCCESS(rc))
    1384 #endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    1385         {
    1386             /*
    1387              * Add the chunk.
    1388              *
    1389              * This must be done before the unwind init so windows can allocate
    1390              * memory from the chunk when using the alternative sub-allocator.
    1391              */
    1392             pExecMemAllocator->aChunks[idxChunk].pvChunk      = pvChunk;
    1393 #ifdef IN_RING3
    1394             pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
    1395 #endif
    1396 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1397             pExecMemAllocator->aChunks[idxChunk].hHeap        = hHeap;
    1398 #else
    1399             pExecMemAllocator->aChunks[idxChunk].cFreeUnits   = pExecMemAllocator->cUnitsPerChunk;
    1400             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = 0;
    1401             memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1402                    0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1403 #endif
    1404 
    1405             pExecMemAllocator->cChunks      = idxChunk + 1;
    1406             pExecMemAllocator->idxChunkHint = idxChunk;
    1407 
    1408 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1409             pExecMemAllocator->cbTotal     += pExecMemAllocator->cbChunk;
    1410             pExecMemAllocator->cbFree      += pExecMemAllocator->cbChunk;
    1411 #else
    1412             size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
    1413             pExecMemAllocator->cbTotal     += cbFree;
    1414             pExecMemAllocator->cbFree      += cbFree;
    1415 #endif
    1416 
    1417 #ifdef IN_RING3
    1418             /*
    1419              * Initialize the unwind information (this cannot really fail atm).
    1420              * (This sets pvUnwindInfo.)
    1421              */
    1422             rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
    1423             if (RT_SUCCESS(rc))
    1424 #endif
    1425             {
    1426                 return VINF_SUCCESS;
    1427             }
    1428 
    1429 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1430             /* Just in case the impossible happens, undo the above up: */
    1431             pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
    1432             pExecMemAllocator->cbFree  -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1433             pExecMemAllocator->cChunks  = idxChunk;
    1434             memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1435                    0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1436             pExecMemAllocator->aChunks[idxChunk].pvChunk    = NULL;
    1437             pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
    1438 #endif
    1439         }
    1440 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1441     }
    1442 #endif
    1443     RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
    1444     RT_NOREF(pVCpu);
    1445     return rc;
    1446 }
    1447 
    1448 
    1449 /**
    1450  * Initializes the executable memory allocator for native recompilation on the
    1451  * calling EMT.
    1452  *
    1453  * @returns VBox status code.
    1454  * @param   pVCpu       The cross context virtual CPU structure of the calling
    1455  *                      thread.
    1456  * @param   cbMax       The max size of the allocator.
    1457  * @param   cbInitial   The initial allocator size.
    1458  * @param   cbChunk     The chunk size, 0 or UINT32_MAX for default (@a cbMax
    1459  *                      dependent).
    1460  */
    1461 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
    1462 {
    1463     /*
    1464      * Validate input.
    1465      */
    1466     AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
    1467     AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
    1468     AssertLogRelMsgReturn(   cbChunk != UINT32_MAX
    1469                           || cbChunk == 0
    1470                           || (   RT_IS_POWER_OF_TWO(cbChunk)
    1471                               && cbChunk >= _1M
    1472                               && cbChunk <= _256M
    1473                               && cbChunk <= cbMax),
    1474                           ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
    1475                           VERR_OUT_OF_RANGE);
    1476 
    1477     /*
    1478      * Adjust/figure out the chunk size.
    1479      */
    1480     if (cbChunk == 0 || cbChunk == UINT32_MAX)
    1481     {
    1482         if (cbMax >= _256M)
    1483             cbChunk = _64M;
    1484         else
    1485         {
    1486             if (cbMax < _16M)
    1487                 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
    1488             else
    1489                 cbChunk = (uint32_t)cbMax / 4;
    1490             if (!RT_IS_POWER_OF_TWO(cbChunk))
    1491                 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
    1492         }
    1493     }
    1494 
    1495     if (cbChunk > cbMax)
    1496         cbMax = cbChunk;
    1497     else
    1498         cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
    1499     uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
    1500     AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
    1501 
    1502     /*
    1503      * Allocate and initialize the allocatore instance.
    1504      */
    1505     size_t       cbNeeded   = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
    1506 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1507     size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1508     size_t const cbBitmap   = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
    1509     cbNeeded += cbBitmap * cMaxChunks;
    1510     AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
    1511     Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
    1512 #endif
    1513 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1514     size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1515     cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
    1516 #endif
    1517     PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
    1518     AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
    1519                           VERR_NO_MEMORY);
    1520     pExecMemAllocator->uMagic       = IEMEXECMEMALLOCATOR_MAGIC;
    1521     pExecMemAllocator->cbChunk      = cbChunk;
    1522     pExecMemAllocator->cMaxChunks   = cMaxChunks;
    1523     pExecMemAllocator->cChunks      = 0;
    1524     pExecMemAllocator->idxChunkHint = 0;
    1525     pExecMemAllocator->cAllocations = 0;
    1526     pExecMemAllocator->cbTotal      = 0;
    1527     pExecMemAllocator->cbFree       = 0;
    1528     pExecMemAllocator->cbAllocated  = 0;
    1529 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1530     pExecMemAllocator->pbmAlloc                 = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
    1531     pExecMemAllocator->cUnitsPerChunk           = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1532     pExecMemAllocator->cBitmapElementsPerChunk  = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
    1533     memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
    1534 #endif
    1535 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1536     pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
    1537 #endif
    1538     for (uint32_t i = 0; i < cMaxChunks; i++)
    1539     {
    1540 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1541         pExecMemAllocator->aChunks[i].cFreeUnits   = 0;
    1542         pExecMemAllocator->aChunks[i].idxFreeHint  = 0;
    1543 #else
    1544         pExecMemAllocator->aChunks[i].hHeap        = NIL_RTHEAPSIMPLE;
    1545 #endif
    1546         pExecMemAllocator->aChunks[i].pvChunk      = NULL;
    1547 #ifdef IN_RING0
    1548         pExecMemAllocator->aChunks[i].hMemObj      = NIL_RTR0MEMOBJ;
    1549 #else
    1550         pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
    1551 #endif
    1552     }
    1553     pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
    1554 
    1555     /*
    1556      * Do the initial allocations.
    1557      */
    1558     while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
    1559     {
    1560         int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    1561         AssertLogRelRCReturn(rc, rc);
    1562     }
    1563 
    1564     pExecMemAllocator->idxChunkHint = 0;
    1565 
    1566     return VINF_SUCCESS;
    1567 }
    1568 
    1569 
    1570 /*********************************************************************************************************************************
    1571 *   Native Recompilation                                                                                                         *
    1572 *********************************************************************************************************************************/
    1573 
    1574 
    1575 /**
    1576  * Used by TB code when encountering a non-zero status or rcPassUp after a call.
    1577  */
    1578 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
    1579 {
    1580     pVCpu->iem.s.cInstructions += idxInstr;
    1581     return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
    1582 }
    1583 
    1584 
    1585 /**
    1586  * Used by TB code when it wants to raise a \#GP(0).
    1587  */
    1588 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
    1589 {
    1590     iemRaiseGeneralProtectionFault0Jmp(pVCpu);
    1591 #ifndef _MSC_VER
    1592     return VINF_IEM_RAISED_XCPT; /* not reached */
    1593 #endif
    1594 }
    1595 
    1596 
    1597 /**
    1598  * Used by TB code when detecting opcode changes.
    1599  * @see iemThreadeFuncWorkerObsoleteTb
    1600  */
    1601 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
    1602 {
    1603     /* We set fSafeToFree to false where as we're being called in the context
    1604        of a TB callback function, which for native TBs means we cannot release
    1605        the executable memory till we've returned our way back to iemTbExec as
    1606        that return path codes via the native code generated for the TB. */
    1607     iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
    1608     return VINF_IEM_REEXEC_BREAK;
    1609 }
    1610 
    1611 
    1612 /**
    1613  * Used by TB code when we need to switch to a TB with CS.LIM checking.
    1614  */
    1615 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
    1616 {
    1617     Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
    1618           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1619           (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
    1620           pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
    1621     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
    1622     return VINF_IEM_REEXEC_BREAK;
    1623 }
    1624 
    1625 
    1626 /*********************************************************************************************************************************
    1627 *   Helpers: Segmented memory fetches and stores.                                                                                *
    1628 *********************************************************************************************************************************/
    1629 
    1630 /**
    1631  * Used by TB code to load unsigned 8-bit data w/ segmentation.
    1632  */
    1633 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1634 {
    1635     return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1636 }
    1637 
    1638 
    1639 /**
    1640  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1641  * to 16 bits.
    1642  */
    1643 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1644 {
    1645     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1646 }
    1647 
    1648 
    1649 /**
    1650  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1651  * to 32 bits.
    1652  */
    1653 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1654 {
    1655     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1656 }
    1657 
    1658 /**
    1659  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1660  * to 64 bits.
    1661  */
    1662 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1663 {
    1664     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1665 }
    1666 
    1667 
    1668 /**
    1669  * Used by TB code to load unsigned 16-bit data w/ segmentation.
    1670  */
    1671 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1672 {
    1673     return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1674 }
    1675 
    1676 
    1677 /**
    1678  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1679  * to 32 bits.
    1680  */
    1681 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1682 {
    1683     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1684 }
    1685 
    1686 
    1687 /**
    1688  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1689  * to 64 bits.
    1690  */
    1691 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1692 {
    1693     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1694 }
    1695 
    1696 
    1697 /**
    1698  * Used by TB code to load unsigned 32-bit data w/ segmentation.
    1699  */
    1700 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1701 {
    1702     return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1703 }
    1704 
    1705 
    1706 /**
    1707  * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
    1708  * to 64 bits.
    1709  */
    1710 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1711 {
    1712     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1713 }
    1714 
    1715 
    1716 /**
    1717  * Used by TB code to load unsigned 64-bit data w/ segmentation.
    1718  */
    1719 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1720 {
    1721     return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem); /** @todo use iemMemFetchDataU8SafeJmp */
    1722 }
    1723 
    1724 
    1725 /**
    1726  * Used by TB code to store unsigned 8-bit data w/ segmentation.
    1727  */
    1728 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
    1729 {
    1730     iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
    1731 }
    1732 
    1733 
    1734 /**
    1735  * Used by TB code to store unsigned 16-bit data w/ segmentation.
    1736  */
    1737 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
    1738 {
    1739     iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
    1740 }
    1741 
    1742 
    1743 /**
    1744  * Used by TB code to store unsigned 32-bit data w/ segmentation.
    1745  */
    1746 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
    1747 {
    1748     iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
    1749 }
    1750 
    1751 
    1752 /**
    1753  * Used by TB code to store unsigned 64-bit data w/ segmentation.
    1754  */
    1755 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
    1756 {
    1757     iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
    1758 }
    1759 
    1760 
    1761 
    1762 /**
    1763  * Used by TB code to push unsigned 16-bit value onto a generic stack.
    1764  */
    1765 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
    1766 {
    1767     iemMemStackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemStackPushU16SafeJmp */
    1768 }
    1769 
    1770 
    1771 /**
    1772  * Used by TB code to push unsigned 32-bit value onto a generic stack.
    1773  */
    1774 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
    1775 {
    1776     iemMemStackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SafeJmp */
    1777 }
    1778 
    1779 
    1780 /**
    1781  * Used by TB code to push 32-bit selector value onto a generic stack.
    1782  *
    1783  * Intel CPUs doesn't do write a whole dword, thus the special function.
    1784  */
    1785 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
    1786 {
    1787     iemMemStackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemStackPushU32SRegSafeJmp */
    1788 }
    1789 
    1790 
    1791 /**
    1792  * Used by TB code to push unsigned 64-bit value onto a generic stack.
    1793  */
    1794 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
    1795 {
    1796     iemMemStackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemStackPushU64SafeJmp */
    1797 }
    1798 
    1799 
    1800 /**
    1801  * Used by TB code to pop a 16-bit general purpose register off a generic stack.
    1802  */
    1803 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
    1804 {
    1805     iemMemStackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU16SafeJmp */
    1806 }
    1807 
    1808 
    1809 /**
    1810  * Used by TB code to pop a 32-bit general purpose register off a generic stack.
    1811  */
    1812 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
    1813 {
    1814     iemMemStackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU32SafeJmp */
    1815 }
    1816 
    1817 
    1818 /**
    1819  * Used by TB code to pop a 64-bit general purpose register off a generic stack.
    1820  */
    1821 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackPopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
    1822 {
    1823     iemMemStackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemStackPopGRegU64SafeJmp */
    1824 }
    1825 
    1826 
    1827 
    1828 /*********************************************************************************************************************************
    1829 *   Helpers: Flat memory fetches and stores.                                                                                     *
    1830 *********************************************************************************************************************************/
    1831 
    1832 /**
    1833  * Used by TB code to load unsigned 8-bit data w/ flat address.
    1834  * @note Zero extending the value to 64-bit to simplify assembly.
    1835  */
    1836 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1837 {
    1838     return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
    1839 }
    1840 
    1841 
    1842 /**
    1843  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    1844  * to 16 bits.
    1845  * @note Zero extending the value to 64-bit to simplify assembly.
    1846  */
    1847 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1848 {
    1849     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
    1850 }
    1851 
    1852 
    1853 /**
    1854  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    1855  * to 32 bits.
    1856  * @note Zero extending the value to 64-bit to simplify assembly.
    1857  */
    1858 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1859 {
    1860     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
    1861 }
    1862 
    1863 
    1864 /**
    1865  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    1866  * to 64 bits.
    1867  */
    1868 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1869 {
    1870     return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
    1871 }
    1872 
    1873 
    1874 /**
    1875  * Used by TB code to load unsigned 16-bit data w/ flat address.
    1876  * @note Zero extending the value to 64-bit to simplify assembly.
    1877  */
    1878 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1879 {
    1880     return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
    1881 }
    1882 
    1883 
    1884 /**
    1885  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    1886  * to 32 bits.
    1887  * @note Zero extending the value to 64-bit to simplify assembly.
    1888  */
    1889 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1890 {
    1891     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
    1892 }
    1893 
    1894 
    1895 /**
    1896  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    1897  * to 64 bits.
    1898  * @note Zero extending the value to 64-bit to simplify assembly.
    1899  */
    1900 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1901 {
    1902     return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU16SafeJmp */
    1903 }
    1904 
    1905 
    1906 /**
    1907  * Used by TB code to load unsigned 32-bit data w/ flat address.
    1908  * @note Zero extending the value to 64-bit to simplify assembly.
    1909  */
    1910 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1911 {
    1912     return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
    1913 }
    1914 
    1915 
    1916 /**
    1917  * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
    1918  * to 64 bits.
    1919  * @note Zero extending the value to 64-bit to simplify assembly.
    1920  */
    1921 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1922 {
    1923     return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU32SafeJmp */
    1924 }
    1925 
    1926 
    1927 /**
    1928  * Used by TB code to load unsigned 64-bit data w/ flat address.
    1929  */
    1930 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1931 {
    1932     return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem); /** @todo use iemMemFlatFetchDataU8SafeJmp */
    1933 }
    1934 
    1935 
    1936 /**
    1937  * Used by TB code to store unsigned 8-bit data w/ flat address.
    1938  */
    1939 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
    1940 {
    1941     iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value); /** @todo use iemMemStoreDataU8SafeJmp */
    1942 }
    1943 
    1944 
    1945 /**
    1946  * Used by TB code to store unsigned 16-bit data w/ flat address.
    1947  */
    1948 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    1949 {
    1950     iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value); /** @todo use iemMemStoreDataU16SafeJmp */
    1951 }
    1952 
    1953 
    1954 /**
    1955  * Used by TB code to store unsigned 32-bit data w/ flat address.
    1956  */
    1957 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    1958 {
    1959     iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value); /** @todo use iemMemStoreDataU32SafeJmp */
    1960 }
    1961 
    1962 
    1963 /**
    1964  * Used by TB code to store unsigned 64-bit data w/ flat address.
    1965  */
    1966 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    1967 {
    1968     iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value); /** @todo use iemMemStoreDataU64SafeJmp */
    1969 }
    1970 
    1971 
    1972 
    1973 /**
    1974  * Used by TB code to push unsigned 16-bit value onto a flat 32-bit stack.
    1975  */
    1976 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
    1977 {
    1978     iemMemFlat32StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat32StackPushU16SafeJmp */
    1979 }
    1980 
    1981 
    1982 /**
    1983  * Used by TB code to push unsigned 32-bit value onto a flat 32-bit stack.
    1984  */
    1985 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32,(PVMCPUCC pVCpu, uint32_t u32Value))
    1986 {
    1987     iemMemFlat32StackPushU32Jmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SafeJmp */
    1988 }
    1989 
    1990 
    1991 /**
    1992  * Used by TB code to push segment selector value onto a flat 32-bit stack.
    1993  *
    1994  * Intel CPUs doesn't do write a whole dword, thus the special function.
    1995  */
    1996 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PushU32SReg,(PVMCPUCC pVCpu, uint32_t u32Value))
    1997 {
    1998     iemMemFlat32StackPushU32SRegJmp(pVCpu, u32Value); /** @todo iemMemFlat32StackPushU32SRegSafeJmp */
    1999 }
    2000 
    2001 
    2002 /**
    2003  * Used by TB code to pop a 16-bit general purpose register off a flat 32-bit stack.
    2004  */
    2005 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
    2006 {
    2007     iemMemFlat32StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU16SafeJmp */
    2008 }
    2009 
    2010 
    2011 /**
    2012  * Used by TB code to pop a 64-bit general purpose register off a flat 32-bit stack.
    2013  */
    2014 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat32PopGRegU32,(PVMCPUCC pVCpu, uint8_t iGReg))
    2015 {
    2016     iemMemFlat32StackPopGRegU32Jmp(pVCpu, iGReg); /** @todo iemMemFlat32StackPopGRegU32SafeJmp */
    2017 }
    2018 
    2019 
    2020 
    2021 /**
    2022  * Used by TB code to push unsigned 16-bit value onto a flat 64-bit stack.
    2023  */
    2024 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU16,(PVMCPUCC pVCpu, uint16_t u16Value))
    2025 {
    2026     iemMemFlat64StackPushU16Jmp(pVCpu, u16Value); /** @todo iemMemFlat64StackPushU16SafeJmp */
    2027 }
    2028 
    2029 
    2030 /**
    2031  * Used by TB code to push unsigned 64-bit value onto a flat 64-bit stack.
    2032  */
    2033 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PushU64,(PVMCPUCC pVCpu, uint64_t u64Value))
    2034 {
    2035     iemMemFlat64StackPushU64Jmp(pVCpu, u64Value); /** @todo iemMemFlat64StackPushU64SafeJmp */
    2036 }
    2037 
    2038 
    2039 /**
    2040  * Used by TB code to pop a 16-bit general purpose register off a flat 64-bit stack.
    2041  */
    2042 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU16,(PVMCPUCC pVCpu, uint8_t iGReg))
    2043 {
    2044     iemMemFlat64StackPopGRegU16Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU16SafeJmp */
    2045 }
    2046 
    2047 
    2048 /**
    2049  * Used by TB code to pop a 64-bit general purpose register off a flat 64-bit stack.
    2050  */
    2051 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlat64PopGRegU64,(PVMCPUCC pVCpu, uint8_t iGReg))
    2052 {
    2053     iemMemFlat64StackPopGRegU64Jmp(pVCpu, iGReg); /** @todo iemMemFlat64StackPopGRegU64SafeJmp */
    2054 }
    2055 
    2056 
    2057 
    2058 /*********************************************************************************************************************************
    2059 *   Helpers: Segmented memory mapping.                                                                                           *
    2060 *********************************************************************************************************************************/
    2061 
    2062 /**
    2063  * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
    2064  */
    2065 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2066                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2067 {
    2068     return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RwSafeJmp */
    2069 }
    2070 
    2071 
    2072 /**
    2073  * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
    2074  */
    2075 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2076                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2077 {
    2078     return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8WoSafeJmp */
    2079 }
    2080 
    2081 
    2082 /**
    2083  * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
    2084  */
    2085 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2086                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2087 {
    2088     return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU8RoSafeJmp */
    2089 }
    2090 
    2091 
    2092 /**
    2093  * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
    2094  */
    2095 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2096                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2097 {
    2098     return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RwSafeJmp */
    2099 }
    2100 
    2101 
    2102 /**
    2103  * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
    2104  */
    2105 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2106                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2107 {
    2108     return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16WoSafeJmp */
    2109 }
    2110 
    2111 
    2112 /**
    2113  * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
    2114  */
    2115 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2116                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2117 {
    2118     return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU16RoSafeJmp */
    2119 }
    2120 
    2121 
    2122 /**
    2123  * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
    2124  */
    2125 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2126                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2127 {
    2128     return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RwSafeJmp */
    2129 }
    2130 
    2131 
    2132 /**
    2133  * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
    2134  */
    2135 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2136                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2137 {
    2138     return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32WoSafeJmp */
    2139 }
    2140 
    2141 
    2142 /**
    2143  * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
    2144  */
    2145 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2146                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2147 {
    2148     return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU32RoSafeJmp */
    2149 }
    2150 
    2151 
    2152 /**
    2153  * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
    2154  */
    2155 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2156                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2157 {
    2158     return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RwSafeJmp */
    2159 }
    2160 
    2161 
    2162 /**
    2163  * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
    2164  */
    2165 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2166                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2167 {
    2168     return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64WoSafeJmp */
    2169 }
    2170 
    2171 
    2172 /**
    2173  * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
    2174  */
    2175 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2176                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2177 {
    2178     return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU64RoSafeJmp */
    2179 }
    2180 
    2181 
    2182 /**
    2183  * Used by TB code to map 80-bit float data writeonly w/ segmentation.
    2184  */
    2185 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2186                                                                    RTGCPTR GCPtrMem, uint8_t iSegReg))
    2187 {
    2188     return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataR80WoSafeJmp */
    2189 }
    2190 
    2191 
    2192 /**
    2193  * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
    2194  */
    2195 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2196                                                                   RTGCPTR GCPtrMem, uint8_t iSegReg))
    2197 {
    2198     return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataD80WoSafeJmp */
    2199 }
    2200 
    2201 
    2202 /**
    2203  * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
    2204  */
    2205 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2206                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2207 {
    2208     return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RwSafeJmp */
    2209 }
    2210 
    2211 
    2212 /**
    2213  * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
    2214  */
    2215 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2216                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2217 {
    2218     return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128WoSafeJmp */
    2219 }
    2220 
    2221 
    2222 /**
    2223  * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
    2224  */
    2225 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2226                                                                           RTGCPTR GCPtrMem, uint8_t iSegReg))
    2227 {
    2228     return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem); /** @todo use iemMemMapDataU128RoSafeJmp */
    2229 }
    2230 
    2231 
    2232 /*********************************************************************************************************************************
    2233 *   Helpers: Flat memory mapping.                                                                                                *
    2234 *********************************************************************************************************************************/
    2235 
    2236 /**
    2237  * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
    2238  */
    2239 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2240 {
    2241     return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RwSafeJmp */
    2242 }
    2243 
    2244 
    2245 /**
    2246  * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
    2247  */
    2248 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2249 {
    2250     return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8WoSafeJmp */
    2251 }
    2252 
    2253 
    2254 /**
    2255  * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
    2256  */
    2257 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2258 {
    2259     return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU8RoSafeJmp */
    2260 }
    2261 
    2262 
    2263 /**
    2264  * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
    2265  */
    2266 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2267 {
    2268     return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RwSafeJmp */
    2269 }
    2270 
    2271 
    2272 /**
    2273  * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
    2274  */
    2275 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2276 {
    2277     return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16WoSafeJmp */
    2278 }
    2279 
    2280 
    2281 /**
    2282  * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
    2283  */
    2284 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2285 {
    2286     return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU16RoSafeJmp */
    2287 }
    2288 
    2289 
    2290 /**
    2291  * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
    2292  */
    2293 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2294 {
    2295     return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RwSafeJmp */
    2296 }
    2297 
    2298 
    2299 /**
    2300  * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
    2301  */
    2302 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2303 {
    2304     return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32WoSafeJmp */
    2305 }
    2306 
    2307 
    2308 /**
    2309  * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
    2310  */
    2311 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2312 {
    2313     return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU32RoSafeJmp */
    2314 }
    2315 
    2316 
    2317 /**
    2318  * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
    2319  */
    2320 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2321 {
    2322     return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RwSafeJmp */
    2323 }
    2324 
    2325 
    2326 /**
    2327  * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
    2328  */
    2329 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2330 {
    2331     return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64WoSafeJmp */
    2332 }
    2333 
    2334 
    2335 /**
    2336  * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
    2337  */
    2338 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2339 {
    2340     return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU64RoSafeJmp */
    2341 }
    2342 
    2343 
    2344 /**
    2345  * Used by TB code to map 80-bit float data writeonly w/ flat address.
    2346  */
    2347 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2348 {
    2349     return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataR80WoSafeJmp */
    2350 }
    2351 
    2352 
    2353 /**
    2354  * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
    2355  */
    2356 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2357 {
    2358     return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataD80WoSafeJmp */
    2359 }
    2360 
    2361 
    2362 /**
    2363  * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
    2364  */
    2365 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2366 {
    2367     return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RwSafeJmp */
    2368 }
    2369 
    2370 
    2371 /**
    2372  * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
    2373  */
    2374 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2375 {
    2376     return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128WoSafeJmp */
    2377 }
    2378 
    2379 
    2380 /**
    2381  * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
    2382  */
    2383 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2384 {
    2385     return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem); /** @todo use iemMemFlatMapDataU128RoSafeJmp */
    2386 }
    2387 
    2388 
    2389 /*********************************************************************************************************************************
    2390 *   Helpers: Commit, rollback & unmap                                                                                            *
    2391 *********************************************************************************************************************************/
    2392 
    2393 /**
    2394  * Used by TB code to commit and unmap a read-write memory mapping.
    2395  */
    2396 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2397 {
    2398     return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
    2399 }
    2400 
    2401 
    2402 /**
    2403  * Used by TB code to commit and unmap a write-only memory mapping.
    2404  */
    2405 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2406 {
    2407     return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
    2408 }
    2409 
    2410 
    2411 /**
    2412  * Used by TB code to commit and unmap a read-only memory mapping.
    2413  */
    2414 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2415 {
    2416     return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
    2417 }
    2418 
    2419 
    2420 /**
    2421  * Reinitializes the native recompiler state.
    2422  *
    2423  * Called before starting a new recompile job.
    2424  */
    2425 static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
    2426 {
    2427     pReNative->cLabels                     = 0;
    2428     pReNative->bmLabelTypes                = 0;
    2429     pReNative->cFixups                     = 0;
    2430 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2431     pReNative->pDbgInfo->cEntries          = 0;
    2432 #endif
    2433     pReNative->pTbOrg                      = pTb;
    2434     pReNative->cCondDepth                  = 0;
    2435     pReNative->uCondSeqNo                  = 0;
    2436     pReNative->uCheckIrqSeqNo              = 0;
    2437     pReNative->uTlbSeqNo                   = 0;
    2438 
    2439     pReNative->Core.bmHstRegs              = IEMNATIVE_REG_FIXED_MASK
    2440 #if IEMNATIVE_HST_GREG_COUNT < 32
    2441                                            | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
    2442 #endif
    2443                                            ;
    2444     pReNative->Core.bmHstRegsWithGstShadow = 0;
    2445     pReNative->Core.bmGstRegShadows        = 0;
    2446     pReNative->Core.bmVars                 = 0;
    2447     pReNative->Core.bmStack                = 0;
    2448     AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
    2449     pReNative->Core.u64ArgVars             = UINT64_MAX;
    2450 
    2451     AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 8);
    2452     pReNative->aidxUniqueLabels[0]         = UINT32_MAX;
    2453     pReNative->aidxUniqueLabels[1]         = UINT32_MAX;
    2454     pReNative->aidxUniqueLabels[2]         = UINT32_MAX;
    2455     pReNative->aidxUniqueLabels[3]         = UINT32_MAX;
    2456     pReNative->aidxUniqueLabels[4]         = UINT32_MAX;
    2457     pReNative->aidxUniqueLabels[5]         = UINT32_MAX;
    2458     pReNative->aidxUniqueLabels[6]         = UINT32_MAX;
    2459     pReNative->aidxUniqueLabels[7]         = UINT32_MAX;
    2460 
    2461     /* Full host register reinit: */
    2462     for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
    2463     {
    2464         pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
    2465         pReNative->Core.aHstRegs[i].enmWhat        = kIemNativeWhat_Invalid;
    2466         pReNative->Core.aHstRegs[i].idxVar         = UINT8_MAX;
    2467     }
    2468 
    2469     uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
    2470                    & ~(  RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
    2471 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2472                        | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
    2473 #endif
    2474 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2475                        | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    2476 #endif
    2477                       );
    2478     for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
    2479     {
    2480         fRegs &= ~RT_BIT_32(idxReg);
    2481         pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
    2482     }
    2483 
    2484     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat     = kIemNativeWhat_pVCpuFixed;
    2485 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2486     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat   = kIemNativeWhat_pCtxFixed;
    2487 #endif
    2488 #ifdef IEMNATIVE_REG_FIXED_TMP0
    2489     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat       = kIemNativeWhat_FixedTmp;
    2490 #endif
    2491     return pReNative;
    2492 }
    2493 
    2494 
    2495 /**
    2496  * Allocates and initializes the native recompiler state.
    2497  *
    2498  * This is called the first time an EMT wants to recompile something.
    2499  *
    2500  * @returns Pointer to the new recompiler state.
    2501  * @param   pVCpu   The cross context virtual CPU structure of the calling
    2502  *                  thread.
    2503  * @param   pTb     The TB that's about to be recompiled.
    2504  * @thread  EMT(pVCpu)
    2505  */
    2506 static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
    2507 {
    2508     VMCPU_ASSERT_EMT(pVCpu);
    2509 
    2510     PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
    2511     AssertReturn(pReNative, NULL);
    2512 
    2513     /*
    2514      * Try allocate all the buffers and stuff we need.
    2515      */
    2516     pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
    2517     pReNative->paLabels  = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
    2518     pReNative->paFixups  = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
    2519 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2520     pReNative->pDbgInfo  = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
    2521 #endif
    2522     if (RT_LIKELY(   pReNative->pInstrBuf
    2523                   && pReNative->paLabels
    2524                   && pReNative->paFixups)
    2525 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2526         && pReNative->pDbgInfo
    2527 #endif
    2528        )
    2529     {
    2530         /*
    2531          * Set the buffer & array sizes on success.
    2532          */
    2533         pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
    2534         pReNative->cLabelsAlloc   = _8K;
    2535         pReNative->cFixupsAlloc   = _16K;
    2536 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2537         pReNative->cDbgInfoAlloc  = _16K;
    2538 #endif
    2539 
    2540         /*
    2541          * Done, just need to save it and reinit it.
    2542          */
    2543         pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
    2544         return iemNativeReInit(pReNative, pTb);
    2545     }
    2546 
    2547     /*
    2548      * Failed. Cleanup and return.
    2549      */
    2550     AssertFailed();
    2551     RTMemFree(pReNative->pInstrBuf);
    2552     RTMemFree(pReNative->paLabels);
    2553     RTMemFree(pReNative->paFixups);
    2554 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2555     RTMemFree(pReNative->pDbgInfo);
    2556 #endif
    2557     RTMemFree(pReNative);
    2558     return NULL;
    2559 }
    2560 
    2561 
    2562 /**
    2563  * Creates a label
    2564  *
    2565  * If the label does not yet have a defined position,
    2566  * call iemNativeLabelDefine() later to set it.
    2567  *
    2568  * @returns Label ID. Throws VBox status code on failure, so no need to check
    2569  *          the return value.
    2570  * @param   pReNative   The native recompile state.
    2571  * @param   enmType     The label type.
    2572  * @param   offWhere    The instruction offset of the label.  UINT32_MAX if the
    2573  *                      label is not yet defined (default).
    2574  * @param   uData       Data associated with the lable. Only applicable to
    2575  *                      certain type of labels. Default is zero.
    2576  */
    2577 DECL_HIDDEN_THROW(uint32_t)
    2578 iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    2579                      uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
    2580 {
    2581     Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
    2582 
    2583     /*
    2584      * Locate existing label definition.
    2585      *
    2586      * This is only allowed for forward declarations where offWhere=UINT32_MAX
    2587      * and uData is zero.
    2588      */
    2589     PIEMNATIVELABEL paLabels = pReNative->paLabels;
    2590     uint32_t const  cLabels  = pReNative->cLabels;
    2591     if (   pReNative->bmLabelTypes & RT_BIT_64(enmType)
    2592 #ifndef VBOX_STRICT
    2593         && enmType  <  kIemNativeLabelType_FirstWithMultipleInstances
    2594         && offWhere == UINT32_MAX
    2595         && uData    == 0
    2596 #endif
    2597         )
    2598     {
    2599 #ifndef VBOX_STRICT
    2600         AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
    2601                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2602         uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
    2603         if (idxLabel < pReNative->cLabels)
    2604             return idxLabel;
    2605 #else
    2606         for (uint32_t i = 0; i < cLabels; i++)
    2607             if (   paLabels[i].enmType == enmType
    2608                 && paLabels[i].uData   == uData)
    2609             {
    2610                 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2611                 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2612                 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
    2613                 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
    2614                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2615                 return i;
    2616             }
    2617         AssertStmt(   enmType >= kIemNativeLabelType_FirstWithMultipleInstances
    2618                    || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2619 #endif
    2620     }
    2621 
    2622     /*
    2623      * Make sure we've got room for another label.
    2624      */
    2625     if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
    2626     { /* likely */ }
    2627     else
    2628     {
    2629         uint32_t cNew = pReNative->cLabelsAlloc;
    2630         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    2631         AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    2632         cNew *= 2;
    2633         AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
    2634         paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
    2635         AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
    2636         pReNative->paLabels     = paLabels;
    2637         pReNative->cLabelsAlloc = cNew;
    2638     }
    2639 
    2640     /*
    2641      * Define a new label.
    2642      */
    2643     paLabels[cLabels].off     = offWhere;
    2644     paLabels[cLabels].enmType = enmType;
    2645     paLabels[cLabels].uData   = uData;
    2646     pReNative->cLabels = cLabels + 1;
    2647 
    2648     Assert((unsigned)enmType < 64);
    2649     pReNative->bmLabelTypes |= RT_BIT_64(enmType);
    2650 
    2651     if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    2652     {
    2653         Assert(uData == 0);
    2654         pReNative->aidxUniqueLabels[enmType] = cLabels;
    2655     }
    2656 
    2657     if (offWhere != UINT32_MAX)
    2658     {
    2659 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2660         iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    2661         iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
    2662 #endif
    2663     }
    2664     return cLabels;
    2665 }
    2666 
    2667 
    2668 /**
    2669  * Defines the location of an existing label.
    2670  *
    2671  * @param   pReNative   The native recompile state.
    2672  * @param   idxLabel    The label to define.
    2673  * @param   offWhere    The position.
    2674  */
    2675 DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
    2676 {
    2677     AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
    2678     PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
    2679     AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
    2680     pLabel->off = offWhere;
    2681 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2682     iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    2683     iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
    2684 #endif
    2685 }
    2686 
    2687 
    2688 /**
    2689  * Looks up a lable.
    2690  *
    2691  * @returns Label ID if found, UINT32_MAX if not.
    2692  */
    2693 static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    2694                                    uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
    2695 {
    2696     Assert((unsigned)enmType < 64);
    2697     if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
    2698     {
    2699         if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    2700             return pReNative->aidxUniqueLabels[enmType];
    2701 
    2702         PIEMNATIVELABEL paLabels = pReNative->paLabels;
    2703         uint32_t const  cLabels  = pReNative->cLabels;
    2704         for (uint32_t i = 0; i < cLabels; i++)
    2705             if (   paLabels[i].enmType == enmType
    2706                 && paLabels[i].uData   == uData
    2707                 && (   paLabels[i].off == offWhere
    2708                     || offWhere        == UINT32_MAX
    2709                     || paLabels[i].off == UINT32_MAX))
    2710                 return i;
    2711     }
    2712     return UINT32_MAX;
    2713 }
    2714 
    2715 
    2716 /**
    2717  * Adds a fixup.
    2718  *
    2719  * @throws  VBox status code (int) on failure.
    2720  * @param   pReNative   The native recompile state.
    2721  * @param   offWhere    The instruction offset of the fixup location.
    2722  * @param   idxLabel    The target label ID for the fixup.
    2723  * @param   enmType     The fixup type.
    2724  * @param   offAddend   Fixup addend if applicable to the type. Default is 0.
    2725  */
    2726 DECL_HIDDEN_THROW(void)
    2727 iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
    2728                   IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
    2729 {
    2730     Assert(idxLabel <= UINT16_MAX);
    2731     Assert((unsigned)enmType <= UINT8_MAX);
    2732 
    2733     /*
    2734      * Make sure we've room.
    2735      */
    2736     PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
    2737     uint32_t const  cFixups  = pReNative->cFixups;
    2738     if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
    2739     { /* likely */ }
    2740     else
    2741     {
    2742         uint32_t cNew = pReNative->cFixupsAlloc;
    2743         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    2744         AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    2745         cNew *= 2;
    2746         AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
    2747         paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
    2748         AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
    2749         pReNative->paFixups     = paFixups;
    2750         pReNative->cFixupsAlloc = cNew;
    2751     }
    2752 
    2753     /*
    2754      * Add the fixup.
    2755      */
    2756     paFixups[cFixups].off       = offWhere;
    2757     paFixups[cFixups].idxLabel  = (uint16_t)idxLabel;
    2758     paFixups[cFixups].enmType   = enmType;
    2759     paFixups[cFixups].offAddend = offAddend;
    2760     pReNative->cFixups = cFixups + 1;
    2761 }
    2762 
    2763 
    2764 /**
    2765  * Slow code path for iemNativeInstrBufEnsure.
    2766  */
    2767 DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
    2768 {
    2769     /* Double the buffer size till we meet the request. */
    2770     uint32_t cNew = pReNative->cInstrBufAlloc;
    2771     AssertReturn(cNew > 0, NULL);
    2772     do
    2773         cNew *= 2;
    2774     while (cNew < off + cInstrReq);
    2775 
    2776     uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
    2777 #ifdef RT_ARCH_ARM64
    2778     uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
    2779 #else
    2780     uint32_t const cbMaxInstrBuf = _2M;
    2781 #endif
    2782     AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
    2783 
    2784     void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
    2785     AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
    2786 
    2787     pReNative->cInstrBufAlloc   = cNew;
    2788     return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
    2789 }
    2790 
    2791 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2792 
    2793 /**
    2794  * Grows the static debug info array used during recompilation.
    2795  *
    2796  * @returns Pointer to the new debug info block; throws VBox status code on
    2797  *          failure, so no need to check the return value.
    2798  */
    2799 DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    2800 {
    2801     uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
    2802     AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
    2803     pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
    2804     AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
    2805     pReNative->pDbgInfo      = pDbgInfo;
    2806     pReNative->cDbgInfoAlloc = cNew;
    2807     return pDbgInfo;
    2808 }
    2809 
    2810 
    2811 /**
    2812  * Adds a new debug info uninitialized entry, returning the pointer to it.
    2813  */
    2814 DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    2815 {
    2816     if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
    2817     { /* likely */ }
    2818     else
    2819         pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
    2820     return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
    2821 }
    2822 
    2823 
    2824 /**
    2825  * Debug Info: Adds a native offset record, if necessary.
    2826  */
    2827 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    2828 {
    2829     PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
    2830 
    2831     /*
    2832      * Search backwards to see if we've got a similar record already.
    2833      */
    2834     uint32_t idx     = pDbgInfo->cEntries;
    2835     uint32_t idxStop = idx > 8 ? idx - 8 : 0;
    2836     while (idx-- > idxStop)
    2837         if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
    2838         {
    2839             if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
    2840                 return;
    2841             AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
    2842                        IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
    2843             break;
    2844         }
    2845 
    2846     /*
    2847      * Add it.
    2848      */
    2849     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
    2850     pEntry->NativeOffset.uType     = kIemTbDbgEntryType_NativeOffset;
    2851     pEntry->NativeOffset.offNative = off;
    2852 }
    2853 
    2854 
    2855 /**
    2856  * Debug Info: Record info about a label.
    2857  */
    2858 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
    2859 {
    2860     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    2861     pEntry->Label.uType    = kIemTbDbgEntryType_Label;
    2862     pEntry->Label.uUnused  = 0;
    2863     pEntry->Label.enmLabel = (uint8_t)enmType;
    2864     pEntry->Label.uData    = uData;
    2865 }
    2866 
    2867 
    2868 /**
    2869  * Debug Info: Record info about a threaded call.
    2870  */
    2871 static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
    2872 {
    2873     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    2874     pEntry->ThreadedCall.uType       = kIemTbDbgEntryType_ThreadedCall;
    2875     pEntry->ThreadedCall.fRecompiled = fRecompiled;
    2876     pEntry->ThreadedCall.uUnused     = 0;
    2877     pEntry->ThreadedCall.enmCall     = (uint16_t)enmCall;
    2878 }
    2879 
    2880 
    2881 /**
    2882  * Debug Info: Record info about a new guest instruction.
    2883  */
    2884 static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
    2885 {
    2886     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    2887     pEntry->GuestInstruction.uType   = kIemTbDbgEntryType_GuestInstruction;
    2888     pEntry->GuestInstruction.uUnused = 0;
    2889     pEntry->GuestInstruction.fExec   = fExec;
    2890 }
    2891 
    2892 
    2893 /**
    2894  * Debug Info: Record info about guest register shadowing.
    2895  */
    2896 static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
    2897                                                  uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
    2898 {
    2899     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    2900     pEntry->GuestRegShadowing.uType         = kIemTbDbgEntryType_GuestRegShadowing;
    2901     pEntry->GuestRegShadowing.uUnused       = 0;
    2902     pEntry->GuestRegShadowing.idxGstReg     = enmGstReg;
    2903     pEntry->GuestRegShadowing.idxHstReg     = idxHstReg;
    2904     pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
    2905 }
    2906 
    2907 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    2908 
    2909 
    2910 /*********************************************************************************************************************************
    2911 *   Register Allocator                                                                                                           *
    2912 *********************************************************************************************************************************/
    2913 
    2914 /**
    2915  * Register parameter indexes (indexed by argument number).
    2916  */
    2917 DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
    2918 {
    2919     IEMNATIVE_CALL_ARG0_GREG,
    2920     IEMNATIVE_CALL_ARG1_GREG,
    2921     IEMNATIVE_CALL_ARG2_GREG,
    2922     IEMNATIVE_CALL_ARG3_GREG,
    2923 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    2924     IEMNATIVE_CALL_ARG4_GREG,
    2925 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    2926     IEMNATIVE_CALL_ARG5_GREG,
    2927 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    2928     IEMNATIVE_CALL_ARG6_GREG,
    2929 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    2930     IEMNATIVE_CALL_ARG7_GREG,
    2931 #   endif
    2932 #  endif
    2933 # endif
    2934 #endif
    2935 };
    2936 
    2937 /**
    2938  * Call register masks indexed by argument count.
    2939  */
    2940 DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
    2941 {
    2942     0,
    2943     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
    2944     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
    2945     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
    2946       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    2947     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
    2948 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    2949       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    2950     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
    2951 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    2952       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    2953     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
    2954 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    2955       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    2956     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    2957     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
    2958 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    2959       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    2960     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    2961     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
    2962 #   endif
    2963 #  endif
    2964 # endif
    2965 #endif
    2966 };
    2967 
    2968 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    2969 /**
    2970  * BP offset of the stack argument slots.
    2971  *
    2972  * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
    2973  * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
    2974  */
    2975 DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
    2976 {
    2977     IEMNATIVE_FP_OFF_STACK_ARG0,
    2978 # ifdef IEMNATIVE_FP_OFF_STACK_ARG1
    2979     IEMNATIVE_FP_OFF_STACK_ARG1,
    2980 # endif
    2981 # ifdef IEMNATIVE_FP_OFF_STACK_ARG2
    2982     IEMNATIVE_FP_OFF_STACK_ARG2,
    2983 # endif
    2984 # ifdef IEMNATIVE_FP_OFF_STACK_ARG3
    2985     IEMNATIVE_FP_OFF_STACK_ARG3,
    2986 # endif
    2987 };
    2988 AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
    2989 #endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
    2990 
    2991 /**
    2992  * Info about shadowed guest register values.
    2993  * @see IEMNATIVEGSTREG
    2994  */
    2995 static struct
    2996 {
    2997     /** Offset in VMCPU. */
    2998     uint32_t    off;
    2999     /** The field size. */
    3000     uint8_t     cb;
    3001     /** Name (for logging). */
    3002     const char *pszName;
    3003 } const g_aGstShadowInfo[] =
    3004 {
    3005 #define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
    3006     /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */  { CPUMCTX_OFF_AND_SIZE(rax),                "rax", },
    3007     /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */  { CPUMCTX_OFF_AND_SIZE(rcx),                "rcx", },
    3008     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */  { CPUMCTX_OFF_AND_SIZE(rdx),                "rdx", },
    3009     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */  { CPUMCTX_OFF_AND_SIZE(rbx),                "rbx", },
    3010     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */  { CPUMCTX_OFF_AND_SIZE(rsp),                "rsp", },
    3011     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */  { CPUMCTX_OFF_AND_SIZE(rbp),                "rbp", },
    3012     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */  { CPUMCTX_OFF_AND_SIZE(rsi),                "rsi", },
    3013     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */  { CPUMCTX_OFF_AND_SIZE(rdi),                "rdi", },
    3014     /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */  { CPUMCTX_OFF_AND_SIZE(r8),                 "r8", },
    3015     /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */  { CPUMCTX_OFF_AND_SIZE(r9),                 "r9", },
    3016     /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */  { CPUMCTX_OFF_AND_SIZE(r10),                "r10", },
    3017     /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */  { CPUMCTX_OFF_AND_SIZE(r11),                "r11", },
    3018     /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */  { CPUMCTX_OFF_AND_SIZE(r12),                "r12", },
    3019     /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */  { CPUMCTX_OFF_AND_SIZE(r13),                "r13", },
    3020     /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */  { CPUMCTX_OFF_AND_SIZE(r14),                "r14", },
    3021     /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */  { CPUMCTX_OFF_AND_SIZE(r15),                "r15", },
    3022     /* [kIemNativeGstReg_Pc] = */                       { CPUMCTX_OFF_AND_SIZE(rip),                "rip", },
    3023     /* [kIemNativeGstReg_EFlags] = */                   { CPUMCTX_OFF_AND_SIZE(eflags),             "eflags", },
    3024     /* [kIemNativeGstReg_SegSelFirst + 0] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel),      "es", },
    3025     /* [kIemNativeGstReg_SegSelFirst + 1] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel),      "cs", },
    3026     /* [kIemNativeGstReg_SegSelFirst + 2] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel),      "ss", },
    3027     /* [kIemNativeGstReg_SegSelFirst + 3] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel),      "ds", },
    3028     /* [kIemNativeGstReg_SegSelFirst + 4] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel),      "fs", },
    3029     /* [kIemNativeGstReg_SegSelFirst + 5] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel),      "gs", },
    3030     /* [kIemNativeGstReg_SegBaseFirst + 0] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base),  "es_base", },
    3031     /* [kIemNativeGstReg_SegBaseFirst + 1] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base),  "cs_base", },
    3032     /* [kIemNativeGstReg_SegBaseFirst + 2] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base),  "ss_base", },
    3033     /* [kIemNativeGstReg_SegBaseFirst + 3] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base),  "ds_base", },
    3034     /* [kIemNativeGstReg_SegBaseFirst + 4] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base),  "fs_base", },
    3035     /* [kIemNativeGstReg_SegBaseFirst + 5] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base),  "gs_base", },
    3036     /* [kIemNativeGstReg_SegLimitFirst + 0] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
    3037     /* [kIemNativeGstReg_SegLimitFirst + 1] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
    3038     /* [kIemNativeGstReg_SegLimitFirst + 2] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
    3039     /* [kIemNativeGstReg_SegLimitFirst + 3] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
    3040     /* [kIemNativeGstReg_SegLimitFirst + 4] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
    3041     /* [kIemNativeGstReg_SegLimitFirst + 5] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
    3042 #undef CPUMCTX_OFF_AND_SIZE
    3043 };
    3044 AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
    3045 
    3046 
    3047 /** Host CPU general purpose register names. */
    3048 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
    3049 {
    3050 #ifdef RT_ARCH_AMD64
    3051     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
    3052 #elif RT_ARCH_ARM64
    3053     "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",  "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    3054     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp",  "lr",  "sp/xzr",
    3055 #else
    3056 # error "port me"
    3057 #endif
    3058 };
    3059 
    3060 
    3061 DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
    3062                                                      IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
    3063 {
    3064     pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    3065 
    3066     pReNative->Core.aHstRegs[idxReg].enmWhat        = enmWhat;
    3067     pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3068     pReNative->Core.aHstRegs[idxReg].idxVar         = idxVar;
    3069     return (uint8_t)idxReg;
    3070 }
    3071 
    3072 
    3073 /**
    3074  * Tries to locate a suitable register in the given register mask.
    3075  *
    3076  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    3077  * failed.
    3078  *
    3079  * @returns Host register number on success, returns UINT8_MAX on failure.
    3080  */
    3081 static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
    3082 {
    3083     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3084     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    3085     if (fRegs)
    3086     {
    3087         /** @todo pick better here:    */
    3088         unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
    3089 
    3090         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3091         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3092                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3093         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3094 
    3095         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3096         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3097         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3098         return idxReg;
    3099     }
    3100     return UINT8_MAX;
    3101 }
    3102 
    3103 
    3104 /**
    3105  * Locate a register, possibly freeing one up.
    3106  *
    3107  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    3108  * failed.
    3109  *
    3110  * @returns Host register number on success. Returns UINT8_MAX if no registers
    3111  *          found, the caller is supposed to deal with this and raise a
    3112  *          allocation type specific status code (if desired).
    3113  *
    3114  * @throws  VBox status code if we're run into trouble spilling a variable of
    3115  *          recording debug info.  Does NOT throw anything if we're out of
    3116  *          registers, though.
    3117  */
    3118 static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
    3119                                          uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
    3120 {
    3121     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3122     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    3123 
    3124     /*
    3125      * Try a freed register that's shadowing a guest register
    3126      */
    3127     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    3128     if (fRegs)
    3129     {
    3130         unsigned const idxReg = (fPreferVolatile
    3131                                  ? ASMBitFirstSetU32(fRegs)
    3132                                  : ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3133                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
    3134                               - 1;
    3135 
    3136         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3137         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3138                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3139         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3140 
    3141         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3142         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3143         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3144         return idxReg;
    3145     }
    3146 
    3147     /*
    3148      * Try free up a variable that's in a register.
    3149      *
    3150      * We do two rounds here, first evacuating variables we don't need to be
    3151      * saved on the stack, then in the second round move things to the stack.
    3152      */
    3153     for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
    3154     {
    3155         uint32_t fVars = pReNative->Core.bmVars;
    3156         while (fVars)
    3157         {
    3158             uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
    3159             uint8_t const  idxReg = pReNative->Core.aVars[idxVar].idxReg;
    3160             if (   idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
    3161                 && (RT_BIT_32(idxReg) & fRegMask)
    3162                 && (  iLoop == 0
    3163                     ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
    3164                     : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    3165                 && !pReNative->Core.aVars[idxVar].fRegAcquired)
    3166             {
    3167                 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
    3168                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
    3169                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3170                 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3171                 Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    3172                        == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
    3173 
    3174                 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    3175                 {
    3176                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    3177                     *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
    3178                 }
    3179 
    3180                 pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    3181                 pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxReg);
    3182 
    3183                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3184                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3185                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3186                 return idxReg;
    3187             }
    3188             fVars &= ~RT_BIT_32(idxVar);
    3189         }
    3190     }
    3191 
    3192     return UINT8_MAX;
    3193 }
    3194 
    3195 
    3196 /**
    3197  * Reassigns a variable to a different register specified by the caller.
    3198  *
    3199  * @returns The new code buffer position.
    3200  * @param   pReNative       The native recompile state.
    3201  * @param   off             The current code buffer position.
    3202  * @param   idxVar          The variable index.
    3203  * @param   idxRegOld       The old host register number.
    3204  * @param   idxRegNew       The new host register number.
    3205  * @param   pszCaller       The caller for logging.
    3206  */
    3207 static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3208                                     uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
    3209 {
    3210     Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
    3211     RT_NOREF(pszCaller);
    3212 
    3213     iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
    3214 
    3215     uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    3216     Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
    3217            pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
    3218     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
    3219 
    3220     pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
    3221     pReNative->Core.aHstRegs[idxRegNew].enmWhat        = kIemNativeWhat_Var;
    3222     pReNative->Core.aHstRegs[idxRegNew].idxVar         = idxVar;
    3223     if (fGstRegShadows)
    3224     {
    3225         pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
    3226                                                | RT_BIT_32(idxRegNew);
    3227         while (fGstRegShadows)
    3228         {
    3229             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    3230             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    3231 
    3232             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
    3233             pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
    3234         }
    3235     }
    3236 
    3237     pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
    3238     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    3239     pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
    3240     return off;
    3241 }
    3242 
    3243 
    3244 /**
    3245  * Moves a variable to a different register or spills it onto the stack.
    3246  *
    3247  * This must be a stack variable (kIemNativeVarKind_Stack) because the other
    3248  * kinds can easily be recreated if needed later.
    3249  *
    3250  * @returns The new code buffer position.
    3251  * @param   pReNative       The native recompile state.
    3252  * @param   off             The current code buffer position.
    3253  * @param   idxVar          The variable index.
    3254  * @param   fForbiddenRegs  Mask of the forbidden registers.  Defaults to
    3255  *                          call-volatile registers.
    3256  */
    3257 static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3258                                                 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
    3259 {
    3260     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    3261     Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
    3262     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    3263 
    3264     uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
    3265     Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3266     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
    3267     Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
    3268     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
    3269            == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
    3270     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3271     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
    3272            == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
    3273 
    3274 
    3275     /** @todo Add statistics on this.*/
    3276     /** @todo Implement basic variable liveness analysis (python) so variables
    3277      * can be freed immediately once no longer used.  This has the potential to
    3278      * be trashing registers and stack for dead variables. */
    3279 
    3280     /*
    3281      * First try move it to a different register, as that's cheaper.
    3282      */
    3283     fForbiddenRegs |= RT_BIT_32(idxRegOld);
    3284     fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
    3285     uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
    3286     if (fRegs)
    3287     {
    3288         /* Avoid using shadow registers, if possible. */
    3289         if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
    3290             fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
    3291         unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
    3292         return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
    3293     }
    3294 
    3295     /*
    3296      * Otherwise we must spill the register onto the stack.
    3297      */
    3298     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    3299     Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    3300            idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    3301     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    3302 
    3303     pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    3304     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    3305     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    3306     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    3307     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    3308     return off;
    3309 }
    3310 
    3311 
    3312 /**
    3313  * Allocates a temporary host general purpose register.
    3314  *
    3315  * This may emit code to save register content onto the stack in order to free
    3316  * up a register.
    3317  *
    3318  * @returns The host register number; throws VBox status code on failure,
    3319  *          so no need to check the return value.
    3320  * @param   pReNative       The native recompile state.
    3321  * @param   poff            Pointer to the variable with the code buffer position.
    3322  *                          This will be update if we need to move a variable from
    3323  *                          register to stack in order to satisfy the request.
    3324  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3325  *                          registers (@c true, default) or the other way around
    3326  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    3327  */
    3328 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
    3329 {
    3330     /*
    3331      * Try find a completely unused register, preferably a call-volatile one.
    3332      */
    3333     uint8_t  idxReg;
    3334     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    3335                    & ~pReNative->Core.bmHstRegsWithGstShadow
    3336                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
    3337     if (fRegs)
    3338     {
    3339         if (fPreferVolatile)
    3340             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3341                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3342         else
    3343             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3344                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3345         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    3346         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    3347     }
    3348     else
    3349     {
    3350         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
    3351         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    3352     }
    3353     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    3354 }
    3355 
    3356 
    3357 /**
    3358  * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
    3359  * registers.
    3360  *
    3361  * @returns The host register number; throws VBox status code on failure,
    3362  *          so no need to check the return value.
    3363  * @param   pReNative       The native recompile state.
    3364  * @param   poff            Pointer to the variable with the code buffer position.
    3365  *                          This will be update if we need to move a variable from
    3366  *                          register to stack in order to satisfy the request.
    3367  * @param   fRegMask        Mask of acceptable registers.
    3368  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3369  *                          registers (@c true, default) or the other way around
    3370  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    3371  */
    3372 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
    3373                                                   bool fPreferVolatile /*= true*/)
    3374 {
    3375     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3376     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    3377 
    3378     /*
    3379      * Try find a completely unused register, preferably a call-volatile one.
    3380      */
    3381     uint8_t  idxReg;
    3382     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    3383                    & ~pReNative->Core.bmHstRegsWithGstShadow
    3384                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    3385                    & fRegMask;
    3386     if (fRegs)
    3387     {
    3388         if (fPreferVolatile)
    3389             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3390                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3391         else
    3392             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3393                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3394         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    3395         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    3396     }
    3397     else
    3398     {
    3399         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
    3400         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    3401     }
    3402     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    3403 }
    3404 
    3405 
    3406 /**
    3407  * Allocates a temporary register for loading an immediate value into.
    3408  *
    3409  * This will emit code to load the immediate, unless there happens to be an
    3410  * unused register with the value already loaded.
    3411  *
    3412  * The caller will not modify the returned register, it must be considered
    3413  * read-only.  Free using iemNativeRegFreeTmpImm.
    3414  *
    3415  * @returns The host register number; throws VBox status code on failure, so no
    3416  *          need to check the return value.
    3417  * @param   pReNative       The native recompile state.
    3418  * @param   poff            Pointer to the variable with the code buffer position.
    3419  * @param   uImm            The immediate value that the register must hold upon
    3420  *                          return.
    3421  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3422  *                          registers (@c true, default) or the other way around
    3423  *                          (@c false).
    3424  *
    3425  * @note    Reusing immediate values has not been implemented yet.
    3426  */
    3427 DECL_HIDDEN_THROW(uint8_t)
    3428 iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
    3429 {
    3430     uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
    3431     *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
    3432     return idxReg;
    3433 }
    3434 
    3435 
    3436 /**
    3437  * Marks host register @a idxHstReg as containing a shadow copy of guest
    3438  * register @a enmGstReg.
    3439  *
    3440  * ASSUMES that caller has made sure @a enmGstReg is not associated with any
    3441  * host register before calling.
    3442  */
    3443 DECL_FORCE_INLINE(void)
    3444 iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3445 {
    3446     Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
    3447     Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    3448     Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
    3449 
    3450     pReNative->Core.aidxGstRegShadows[enmGstReg]       = idxHstReg;
    3451     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
    3452     pReNative->Core.bmGstRegShadows                   |= RT_BIT_64(enmGstReg);
    3453     pReNative->Core.bmHstRegsWithGstShadow            |= RT_BIT_32(idxHstReg);
    3454 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3455     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    3456     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
    3457 #else
    3458     RT_NOREF(off);
    3459 #endif
    3460 }
    3461 
    3462 
    3463 /**
    3464  * Clear any guest register shadow claims from @a idxHstReg.
    3465  *
    3466  * The register does not need to be shadowing any guest registers.
    3467  */
    3468 DECL_FORCE_INLINE(void)
    3469 iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
    3470 {
    3471     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    3472               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    3473            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3474     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    3475            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    3476 
    3477 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3478     uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    3479     if (fGstRegs)
    3480     {
    3481         Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
    3482         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    3483         while (fGstRegs)
    3484         {
    3485             unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    3486             fGstRegs &= ~RT_BIT_64(iGstReg);
    3487             iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
    3488         }
    3489     }
    3490 #else
    3491     RT_NOREF(off);
    3492 #endif
    3493 
    3494     pReNative->Core.bmHstRegsWithGstShadow            &= ~RT_BIT_32(idxHstReg);
    3495     pReNative->Core.bmGstRegShadows                   &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    3496     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    3497 }
    3498 
    3499 
    3500 /**
    3501  * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
    3502  * and global overview flags.
    3503  */
    3504 DECL_FORCE_INLINE(void)
    3505 iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3506 {
    3507     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3508     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    3509               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    3510            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3511     Assert(pReNative->Core.bmGstRegShadows                    & RT_BIT_64(enmGstReg));
    3512     Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    3513     Assert(pReNative->Core.bmHstRegsWithGstShadow             & RT_BIT_32(idxHstReg));
    3514 
    3515 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3516     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    3517     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
    3518 #else
    3519     RT_NOREF(off);
    3520 #endif
    3521 
    3522     uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    3523     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    3524     if (!fGstRegShadowsNew)
    3525         pReNative->Core.bmHstRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
    3526     pReNative->Core.bmGstRegShadows                   &= ~RT_BIT_64(enmGstReg);
    3527 }
    3528 
    3529 
    3530 /**
    3531  * Clear any guest register shadow claim for @a enmGstReg.
    3532  */
    3533 DECL_FORCE_INLINE(void)
    3534 iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3535 {
    3536     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3537     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    3538     {
    3539         Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3540         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    3541     }
    3542 }
    3543 
    3544 
    3545 /**
    3546  * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
    3547  * as the new shadow of it.
    3548  */
    3549 DECL_FORCE_INLINE(void)
    3550 iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
    3551                                        IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3552 {
    3553     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3554     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    3555     {
    3556         Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3557         if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
    3558             return;
    3559         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    3560     }
    3561     iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
    3562 }
    3563 
    3564 
    3565 /**
    3566  * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
    3567  * to @a idxRegTo.
    3568  */
    3569 DECL_FORCE_INLINE(void)
    3570 iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
    3571                                     IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3572 {
    3573     Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
    3574     Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
    3575     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
    3576               == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
    3577            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3578     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
    3579            == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
    3580     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
    3581            == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
    3582 
    3583     uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    3584     pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
    3585     if (!fGstRegShadowsFrom)
    3586         pReNative->Core.bmHstRegsWithGstShadow          &= ~RT_BIT_32(idxRegFrom);
    3587     pReNative->Core.bmHstRegsWithGstShadow              |= RT_BIT_32(idxRegTo);
    3588     pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows   |= RT_BIT_64(enmGstReg);
    3589     pReNative->Core.aidxGstRegShadows[enmGstReg]         = idxRegTo;
    3590 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3591     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    3592     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
    3593 #else
    3594     RT_NOREF(off);
    3595 #endif
    3596 }
    3597 
    3598 
    3599 /**
    3600  * Allocates a temporary host general purpose register for keeping a guest
    3601  * register value.
    3602  *
    3603  * Since we may already have a register holding the guest register value,
    3604  * code will be emitted to do the loading if that's not the case. Code may also
    3605  * be emitted if we have to free up a register to satify the request.
    3606  *
    3607  * @returns The host register number; throws VBox status code on failure, so no
    3608  *          need to check the return value.
    3609  * @param   pReNative       The native recompile state.
    3610  * @param   poff            Pointer to the variable with the code buffer
    3611  *                          position. This will be update if we need to move a
    3612  *                          variable from register to stack in order to satisfy
    3613  *                          the request.
    3614  * @param   enmGstReg       The guest register that will is to be updated.
    3615  * @param   enmIntendedUse  How the caller will be using the host register.
    3616  * @sa      iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
    3617  */
    3618 DECL_HIDDEN_THROW(uint8_t)
    3619 iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
    3620                                 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
    3621 {
    3622     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    3623 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)
    3624     static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
    3625 #endif
    3626 
    3627     /*
    3628      * First check if the guest register value is already in a host register.
    3629      */
    3630     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    3631     {
    3632         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    3633         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3634         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    3635         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3636 
    3637         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    3638         {
    3639             /*
    3640              * If the register will trash the guest shadow copy, try find a
    3641              * completely unused register we can use instead.  If that fails,
    3642              * we need to disassociate the host reg from the guest reg.
    3643              */
    3644             /** @todo would be nice to know if preserving the register is in any way helpful. */
    3645             if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
    3646                 && (  ~pReNative->Core.bmHstRegs
    3647                     & ~pReNative->Core.bmHstRegsWithGstShadow
    3648                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
    3649             {
    3650                 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
    3651 
    3652                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    3653 
    3654                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    3655                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    3656                        g_apszIemNativeHstRegNames[idxRegNew]));
    3657                 idxReg = idxRegNew;
    3658             }
    3659             else
    3660             {
    3661                 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    3662                 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    3663                 pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    3664                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    3665                     Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
    3666                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    3667                 else
    3668                 {
    3669                     iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    3670                     Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
    3671                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    3672                 }
    3673             }
    3674         }
    3675         else
    3676         {
    3677             AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    3678                       && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
    3679                       ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
    3680                        idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
    3681 
    3682             /*
    3683              * Allocate a new register, copy the value and, if updating, the
    3684              * guest shadow copy assignment to the new register.
    3685              */
    3686             /** @todo share register for readonly access. */
    3687             uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
    3688 
    3689             if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    3690                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    3691 
    3692             if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    3693                 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    3694                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
    3695                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    3696                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    3697             else
    3698             {
    3699                 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    3700                 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
    3701                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    3702                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    3703             }
    3704             idxReg = idxRegNew;
    3705         }
    3706 
    3707 #ifdef VBOX_STRICT
    3708         /* Strict builds: Check that the value is correct. */
    3709         *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    3710 #endif
    3711 
    3712         return idxReg;
    3713     }
    3714 
    3715     /*
    3716      * Allocate a new register, load it with the guest value and designate it as a copy of the
    3717      */
    3718     uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
    3719 
    3720     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    3721         *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
    3722 
    3723     if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    3724         iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
    3725     Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
    3726            g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    3727 
    3728     return idxRegNew;
    3729 }
    3730 
    3731 
    3732 /**
    3733  * Allocates a temporary host general purpose register that already holds the
    3734  * given guest register value.
    3735  *
    3736  * The use case for this function is places where the shadowing state cannot be
    3737  * modified due to branching and such.  This will fail if the we don't have a
    3738  * current shadow copy handy or if it's incompatible.  The only code that will
    3739  * be emitted here is value checking code in strict builds.
    3740  *
    3741  * The intended use can only be readonly!
    3742  *
    3743  * @returns The host register number, UINT8_MAX if not present.
    3744  * @param   pReNative       The native recompile state.
    3745  * @param   poff            Pointer to the instruction buffer offset.
    3746  *                          Will be updated in strict builds if a register is
    3747  *                          found.
    3748  * @param   enmGstReg       The guest register that will is to be updated.
    3749  * @note    In strict builds, this may throw instruction buffer growth failures.
    3750  *          Non-strict builds will not throw anything.
    3751  * @sa iemNativeRegAllocTmpForGuestReg
    3752  */
    3753 DECL_HIDDEN_THROW(uint8_t)
    3754 iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
    3755 {
    3756     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    3757 
    3758     /*
    3759      * First check if the guest register value is already in a host register.
    3760      */
    3761     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    3762     {
    3763         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    3764         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3765         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    3766         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3767 
    3768         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    3769         {
    3770             /*
    3771              * We only do readonly use here, so easy compared to the other
    3772              * variant of this code.
    3773              */
    3774             pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    3775             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    3776             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    3777             Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
    3778                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    3779 
    3780 #ifdef VBOX_STRICT
    3781             /* Strict builds: Check that the value is correct. */
    3782             *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    3783 #else
    3784             RT_NOREF(poff);
    3785 #endif
    3786             return idxReg;
    3787         }
    3788     }
    3789 
    3790     return UINT8_MAX;
    3791 }
    3792 
    3793 
    3794 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
    3795 
    3796 
    3797 /**
    3798  * Allocates argument registers for a function call.
    3799  *
    3800  * @returns New code buffer offset on success; throws VBox status code on failure, so no
    3801  *          need to check the return value.
    3802  * @param   pReNative   The native recompile state.
    3803  * @param   off         The current code buffer offset.
    3804  * @param   cArgs       The number of arguments the function call takes.
    3805  */
    3806 DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
    3807 {
    3808     AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
    3809                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
    3810     Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    3811     Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    3812 
    3813     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    3814         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    3815     else if (cArgs == 0)
    3816         return true;
    3817 
    3818     /*
    3819      * Do we get luck and all register are free and not shadowing anything?
    3820      */
    3821     if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
    3822         for (uint32_t i = 0; i < cArgs; i++)
    3823         {
    3824             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    3825             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    3826             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    3827             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    3828         }
    3829     /*
    3830      * Okay, not lucky so we have to free up the registers.
    3831      */
    3832     else
    3833         for (uint32_t i = 0; i < cArgs; i++)
    3834         {
    3835             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    3836             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
    3837             {
    3838                 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    3839                 {
    3840                     case kIemNativeWhat_Var:
    3841                     {
    3842                         uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    3843                         AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
    3844                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    3845                         Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
    3846                         Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
    3847 
    3848                         if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    3849                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    3850                         else
    3851                         {
    3852                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    3853                             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    3854                         }
    3855                         break;
    3856                     }
    3857 
    3858                     case kIemNativeWhat_Tmp:
    3859                     case kIemNativeWhat_Arg:
    3860                     case kIemNativeWhat_rc:
    3861                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    3862                     default:
    3863                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
    3864                 }
    3865 
    3866             }
    3867             if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    3868             {
    3869                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3870                 Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3871                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3872                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3873                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3874                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3875             }
    3876             else
    3877                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    3878             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    3879             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    3880         }
    3881     pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
    3882     return true;
    3883 }
    3884 
    3885 
    3886 DECL_HIDDEN_THROW(uint8_t)  iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
    3887 
    3888 
    3889 #if 0
    3890 /**
    3891  * Frees a register assignment of any type.
    3892  *
    3893  * @param   pReNative       The native recompile state.
    3894  * @param   idxHstReg       The register to free.
    3895  *
    3896  * @note    Does not update variables.
    3897  */
    3898 DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    3899 {
    3900     Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3901     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    3902     Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
    3903     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
    3904            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
    3905            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
    3906            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
    3907     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
    3908            || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
    3909            || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
    3910     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    3911            == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    3912     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    3913            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    3914 
    3915     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxHstReg);
    3916     /* no flushing, right:
    3917     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    3918     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    3919     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    3920     */
    3921 }
    3922 #endif
    3923 
    3924 
    3925 /**
    3926  * Frees a temporary register.
    3927  *
    3928  * Any shadow copies of guest registers assigned to the host register will not
    3929  * be flushed by this operation.
    3930  */
    3931 DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    3932 {
    3933     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    3934     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
    3935     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    3936     Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
    3937            g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    3938 }
    3939 
    3940 
    3941 /**
    3942  * Frees a temporary immediate register.
    3943  *
    3944  * It is assumed that the call has not modified the register, so it still hold
    3945  * the same value as when it was allocated via iemNativeRegAllocTmpImm().
    3946  */
    3947 DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    3948 {
    3949     iemNativeRegFreeTmp(pReNative, idxHstReg);
    3950 }
    3951 
    3952 
    3953 /**
    3954  * Frees a register assigned to a variable.
    3955  *
    3956  * The register will be disassociated from the variable.
    3957  */
    3958 DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
    3959 {
    3960     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    3961     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
    3962     uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    3963     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    3964     Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
    3965 
    3966     pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    3967     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    3968     if (!fFlushShadows)
    3969         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
    3970                g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
    3971     else
    3972     {
    3973         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    3974         uint64_t const fGstRegShadowsOld        = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    3975         pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    3976         pReNative->Core.bmGstRegShadows        &= ~fGstRegShadowsOld;
    3977         uint64_t       fGstRegShadows           = fGstRegShadowsOld;
    3978         while (fGstRegShadows)
    3979         {
    3980             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    3981             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    3982 
    3983             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
    3984             pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
    3985         }
    3986         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
    3987                g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
    3988     }
    3989 }
    3990 
    3991 
    3992 /**
    3993  * Called right before emitting a call instruction to move anything important
    3994  * out of call-volatile registers, free and flush the call-volatile registers,
    3995  * optionally freeing argument variables.
    3996  *
    3997  * @returns New code buffer offset, UINT32_MAX on failure.
    3998  * @param   pReNative       The native recompile state.
    3999  * @param   off             The code buffer offset.
    4000  * @param   cArgs           The number of arguments the function call takes.
    4001  *                          It is presumed that the host register part of these have
    4002  *                          been allocated as such already and won't need moving,
    4003  *                          just freeing.
    4004  * @param   fKeepVars       Mask of variables that should keep their register
    4005  *                          assignments.  Caller must take care to handle these.
    4006  */
    4007 DECL_HIDDEN_THROW(uint32_t)
    4008 iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars = 0)
    4009 {
    4010     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    4011 
    4012     /* fKeepVars will reduce this mask. */
    4013     uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4014 
    4015     /*
    4016      * Move anything important out of volatile registers.
    4017      */
    4018     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    4019         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    4020     uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4021 #ifdef IEMNATIVE_REG_FIXED_TMP0
    4022                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    4023 #endif
    4024                          & ~g_afIemNativeCallRegs[cArgs];
    4025 
    4026     fRegsToMove &= pReNative->Core.bmHstRegs;
    4027     if (!fRegsToMove)
    4028     { /* likely */ }
    4029     else
    4030     {
    4031         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
    4032         while (fRegsToMove != 0)
    4033         {
    4034             unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
    4035             fRegsToMove &= ~RT_BIT_32(idxReg);
    4036 
    4037             switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    4038             {
    4039                 case kIemNativeWhat_Var:
    4040                 {
    4041                     uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    4042                     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    4043                     Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
    4044                     Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
    4045                     if (!(RT_BIT_32(idxVar) & fKeepVars))
    4046                     {
    4047                         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
    4048                                idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
    4049                         if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    4050                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    4051                         else
    4052                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    4053                     }
    4054                     else
    4055                         fRegsToFree &= ~RT_BIT_32(idxReg);
    4056                     continue;
    4057                 }
    4058 
    4059                 case kIemNativeWhat_Arg:
    4060                     AssertMsgFailed(("What?!?: %u\n", idxReg));
    4061                     continue;
    4062 
    4063                 case kIemNativeWhat_rc:
    4064                 case kIemNativeWhat_Tmp:
    4065                     AssertMsgFailed(("Missing free: %u\n", idxReg));
    4066                     continue;
    4067 
    4068                 case kIemNativeWhat_FixedTmp:
    4069                 case kIemNativeWhat_pVCpuFixed:
    4070                 case kIemNativeWhat_pCtxFixed:
    4071                 case kIemNativeWhat_FixedReserved:
    4072                 case kIemNativeWhat_Invalid:
    4073                 case kIemNativeWhat_End:
    4074                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
    4075             }
    4076             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
    4077         }
    4078     }
    4079 
    4080     /*
    4081      * Do the actual freeing.
    4082      */
    4083     if (pReNative->Core.bmHstRegs & fRegsToFree)
    4084         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
    4085                pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
    4086     pReNative->Core.bmHstRegs &= ~fRegsToFree;
    4087 
    4088     /* If there are guest register shadows in any call-volatile register, we
    4089        have to clear the corrsponding guest register masks for each register. */
    4090     uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
    4091     if (fHstRegsWithGstShadow)
    4092     {
    4093         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
    4094                pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
    4095         pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
    4096         do
    4097         {
    4098             unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
    4099             fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4100 
    4101             AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
    4102             pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4103             pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4104         } while (fHstRegsWithGstShadow != 0);
    4105     }
    4106 
    4107     return off;
    4108 }
    4109 
    4110 
    4111 /**
    4112  * Flushes a set of guest register shadow copies.
    4113  *
    4114  * This is usually done after calling a threaded function or a C-implementation
    4115  * of an instruction.
    4116  *
    4117  * @param   pReNative       The native recompile state.
    4118  * @param   fGstRegs        Set of guest registers to flush.
    4119  */
    4120 DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
    4121 {
    4122     /*
    4123      * Reduce the mask by what's currently shadowed
    4124      */
    4125     uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
    4126     fGstRegs &= bmGstRegShadowsOld;
    4127     if (fGstRegs)
    4128     {
    4129         uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
    4130         Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
    4131         pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
    4132         if (bmGstRegShadowsNew)
    4133         {
    4134             /*
    4135              * Partial.
    4136              */
    4137             do
    4138             {
    4139                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4140                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    4141                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    4142                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    4143                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    4144 
    4145                 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
    4146                 fGstRegs &= ~fInThisHstReg;
    4147                 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
    4148                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    4149                 if (!fGstRegShadowsNew)
    4150                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4151             } while (fGstRegs != 0);
    4152         }
    4153         else
    4154         {
    4155             /*
    4156              * Clear all.
    4157              */
    4158             do
    4159             {
    4160                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4161                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    4162                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    4163                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    4164                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    4165 
    4166                 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
    4167                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4168             } while (fGstRegs != 0);
    4169             pReNative->Core.bmHstRegsWithGstShadow = 0;
    4170         }
    4171     }
    4172 }
    4173 
    4174 
    4175 /**
    4176  * Flushes delayed write of a specific guest register.
    4177  *
    4178  * This must be called prior to calling CImpl functions and any helpers that use
    4179  * the guest state (like raising exceptions) and such.
    4180  *
    4181  * This optimization has not yet been implemented.  The first target would be
    4182  * RIP updates, since these are the most common ones.
    4183  */
    4184 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    4185                                                                   IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
    4186 {
    4187     RT_NOREF(pReNative, enmClass, idxReg);
    4188     return off;
    4189 }
    4190 
    4191 
    4192 /**
    4193  * Flushes any delayed guest register writes.
    4194  *
    4195  * This must be called prior to calling CImpl functions and any helpers that use
    4196  * the guest state (like raising exceptions) and such.
    4197  *
    4198  * This optimization has not yet been implemented.  The first target would be
    4199  * RIP updates, since these are the most common ones.
    4200  */
    4201 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    4202 {
    4203     RT_NOREF(pReNative, off);
    4204     return off;
    4205 }
    4206 
    4207 
    4208 #ifdef VBOX_STRICT
    4209 /**
    4210  * Does internal register allocator sanity checks.
    4211  */
    4212 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
    4213 {
    4214     /*
    4215      * Iterate host registers building a guest shadowing set.
    4216      */
    4217     uint64_t bmGstRegShadows        = 0;
    4218     uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
    4219     AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
    4220     while (bmHstRegsWithGstShadow)
    4221     {
    4222         unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
    4223         Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4224         bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4225 
    4226         uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4227         AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
    4228         AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
    4229         bmGstRegShadows |= fThisGstRegShadows;
    4230         while (fThisGstRegShadows)
    4231         {
    4232             unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
    4233             fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
    4234             AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
    4235                       ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
    4236                        idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
    4237         }
    4238     }
    4239     AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
    4240               ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
    4241                bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
    4242 
    4243     /*
    4244      * Now the other way around, checking the guest to host index array.
    4245      */
    4246     bmHstRegsWithGstShadow = 0;
    4247     bmGstRegShadows        = pReNative->Core.bmGstRegShadows;
    4248     Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4249     while (bmGstRegShadows)
    4250     {
    4251         unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
    4252         Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    4253         bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
    4254 
    4255         uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    4256         AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
    4257         AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
    4258                   ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
    4259                    idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4260         bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
    4261     }
    4262     AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
    4263               ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
    4264                bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
    4265 }
    4266 #endif
    4267 
    4268 
    4269 /*********************************************************************************************************************************
    4270 *   Code Emitters (larger snippets)                                                                                              *
    4271 *********************************************************************************************************************************/
    4272 
    4273 /**
    4274  * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
    4275  * extending to 64-bit width.
    4276  *
    4277  * @returns New code buffer offset on success, UINT32_MAX on failure.
    4278  * @param   pReNative   .
    4279  * @param   off         The current code buffer position.
    4280  * @param   idxHstReg   The host register to load the guest register value into.
    4281  * @param   enmGstReg   The guest register to load.
    4282  *
    4283  * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
    4284  *       that is something the caller needs to do if applicable.
    4285  */
    4286 DECL_HIDDEN_THROW(uint32_t)
    4287 iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
    4288 {
    4289     Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
    4290     Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
    4291 
    4292     switch (g_aGstShadowInfo[enmGstReg].cb)
    4293     {
    4294         case sizeof(uint64_t):
    4295             return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4296         case sizeof(uint32_t):
    4297             return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4298         case sizeof(uint16_t):
    4299             return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4300 #if 0 /* not present in the table. */
    4301         case sizeof(uint8_t):
    4302             return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4303 #endif
    4304         default:
    4305             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    4306     }
    4307 }
    4308 
    4309 
    4310 #ifdef VBOX_STRICT
    4311 /**
    4312  * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
    4313  *
    4314  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    4315  *       Trashes EFLAGS on AMD64.
    4316  */
    4317 static uint32_t
    4318 iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
    4319 {
    4320 # ifdef RT_ARCH_AMD64
    4321     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    4322 
    4323     /* rol reg64, 32 */
    4324     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    4325     pbCodeBuf[off++] = 0xc1;
    4326     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    4327     pbCodeBuf[off++] = 32;
    4328 
    4329     /* test reg32, ffffffffh */
    4330     if (idxReg >= 8)
    4331         pbCodeBuf[off++] = X86_OP_REX_B;
    4332     pbCodeBuf[off++] = 0xf7;
    4333     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    4334     pbCodeBuf[off++] = 0xff;
    4335     pbCodeBuf[off++] = 0xff;
    4336     pbCodeBuf[off++] = 0xff;
    4337     pbCodeBuf[off++] = 0xff;
    4338 
    4339     /* je/jz +1 */
    4340     pbCodeBuf[off++] = 0x74;
    4341     pbCodeBuf[off++] = 0x01;
    4342 
    4343     /* int3 */
    4344     pbCodeBuf[off++] = 0xcc;
    4345 
    4346     /* rol reg64, 32 */
    4347     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    4348     pbCodeBuf[off++] = 0xc1;
    4349     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    4350     pbCodeBuf[off++] = 32;
    4351 
    4352 # elif defined(RT_ARCH_ARM64)
    4353     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    4354     /* lsr tmp0, reg64, #32 */
    4355     pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
    4356     /* cbz tmp0, +1 */
    4357     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    4358     /* brk #0x1100 */
    4359     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
    4360 
    4361 # else
    4362 #  error "Port me!"
    4363 # endif
    4364     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4365     return off;
    4366 }
    4367 #endif /* VBOX_STRICT */
    4368 
    4369 
    4370 #ifdef VBOX_STRICT
    4371 /**
    4372  * Emitting code that checks that the content of register @a idxReg is the same
    4373  * as what's in the guest register @a enmGstReg, resulting in a breakpoint
    4374  * instruction if that's not the case.
    4375  *
    4376  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    4377  *       Trashes EFLAGS on AMD64.
    4378  */
    4379 static uint32_t
    4380 iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
    4381 {
    4382 # ifdef RT_ARCH_AMD64
    4383     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    4384 
    4385     /* cmp reg, [mem] */
    4386     if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
    4387     {
    4388         if (idxReg >= 8)
    4389             pbCodeBuf[off++] = X86_OP_REX_R;
    4390         pbCodeBuf[off++] = 0x38;
    4391     }
    4392     else
    4393     {
    4394         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
    4395             pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
    4396         else
    4397         {
    4398             if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
    4399                 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    4400             else
    4401                 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
    4402                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
    4403             if (idxReg >= 8)
    4404                 pbCodeBuf[off++] = X86_OP_REX_R;
    4405         }
    4406         pbCodeBuf[off++] = 0x39;
    4407     }
    4408     off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
    4409 
    4410     /* je/jz +1 */
    4411     pbCodeBuf[off++] = 0x74;
    4412     pbCodeBuf[off++] = 0x01;
    4413 
    4414     /* int3 */
    4415     pbCodeBuf[off++] = 0xcc;
    4416 
    4417     /* For values smaller than the register size, we must check that the rest
    4418        of the register is all zeros. */
    4419     if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
    4420     {
    4421         /* test reg64, imm32 */
    4422         pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    4423         pbCodeBuf[off++] = 0xf7;
    4424         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    4425         pbCodeBuf[off++] = 0;
    4426         pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
    4427         pbCodeBuf[off++] = 0xff;
    4428         pbCodeBuf[off++] = 0xff;
    4429 
    4430         /* je/jz +1 */
    4431         pbCodeBuf[off++] = 0x74;
    4432         pbCodeBuf[off++] = 0x01;
    4433 
    4434         /* int3 */
    4435         pbCodeBuf[off++] = 0xcc;
    4436         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4437     }
    4438     else
    4439     {
    4440         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4441         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
    4442             iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
    4443     }
    4444 
    4445 # elif defined(RT_ARCH_ARM64)
    4446     /* mov TMP0, [gstreg] */
    4447     off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
    4448 
    4449     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    4450     /* sub tmp0, tmp0, idxReg */
    4451     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
    4452     /* cbz tmp0, +1 */
    4453     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    4454     /* brk #0x1000+enmGstReg */
    4455     pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
    4456     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4457 
    4458 # else
    4459 #  error "Port me!"
    4460 # endif
    4461     return off;
    4462 }
    4463 #endif /* VBOX_STRICT */
    4464 
    4465 
    4466 #ifdef VBOX_STRICT
    4467 /**
    4468  * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
    4469  * important bits.
    4470  *
    4471  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    4472  *       Trashes EFLAGS on AMD64.
    4473  */
    4474 static uint32_t
    4475 iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
    4476 {
    4477     uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
    4478     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
    4479     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
    4480     off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
    4481 
    4482 #ifdef RT_ARCH_AMD64
    4483     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    4484 
    4485     /* je/jz +1 */
    4486     pbCodeBuf[off++] = 0x74;
    4487     pbCodeBuf[off++] = 0x01;
    4488 
    4489     /* int3 */
    4490     pbCodeBuf[off++] = 0xcc;
    4491 
    4492 # elif defined(RT_ARCH_ARM64)
    4493     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    4494 
    4495     /* b.eq +1 */
    4496     pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
    4497     /* brk #0x2000 */
    4498     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
    4499 
    4500 # else
    4501 #  error "Port me!"
    4502 # endif
    4503     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4504 
    4505     iemNativeRegFreeTmp(pReNative, idxRegTmp);
    4506     return off;
    4507 }
    4508 #endif /* VBOX_STRICT */
    4509 
    4510 
    4511 /**
    4512  * Emits a code for checking the return code of a call and rcPassUp, returning
    4513  * from the code if either are non-zero.
    4514  */
    4515 DECL_HIDDEN_THROW(uint32_t)
    4516 iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    4517 {
    4518 #ifdef RT_ARCH_AMD64
    4519     /*
    4520      * AMD64: eax = call status code.
    4521      */
    4522 
    4523     /* edx = rcPassUp */
    4524     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    4525 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    4526     off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
    4527 # endif
    4528 
    4529     /* edx = eax | rcPassUp */
    4530     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    4531     pbCodeBuf[off++] = 0x0b;                    /* or edx, eax */
    4532     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
    4533     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4534 
    4535     /* Jump to non-zero status return path. */
    4536     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
    4537 
    4538     /* done. */
    4539 
    4540 #elif RT_ARCH_ARM64
    4541     /*
    4542      * ARM64: w0 = call status code.
    4543      */
    4544 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    4545     off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
    4546 # endif
    4547     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    4548 
    4549     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    4550 
    4551     pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
    4552 
    4553     uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    4554     iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
    4555     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
    4556 
    4557 #else
    4558 # error "port me"
    4559 #endif
    4560     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4561     return off;
    4562 }
    4563 
    4564 
    4565 /**
    4566  * Emits code to check if the content of @a idxAddrReg is a canonical address,
    4567  * raising a \#GP(0) if it isn't.
    4568  *
    4569  * @returns New code buffer offset, UINT32_MAX on failure.
    4570  * @param   pReNative       The native recompile state.
    4571  * @param   off             The code buffer offset.
    4572  * @param   idxAddrReg      The host register with the address to check.
    4573  * @param   idxInstr        The current instruction.
    4574  */
    4575 DECL_HIDDEN_THROW(uint32_t)
    4576 iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
    4577 {
    4578     /*
    4579      * Make sure we don't have any outstanding guest register writes as we may
    4580      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    4581      */
    4582     off = iemNativeRegFlushPendingWrites(pReNative, off);
    4583 
    4584 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    4585     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    4586 #else
    4587     RT_NOREF(idxInstr);
    4588 #endif
    4589 
    4590 #ifdef RT_ARCH_AMD64
    4591     /*
    4592      * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
    4593      *     return raisexcpt();
    4594      * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
    4595      */
    4596     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    4597 
    4598     off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
    4599     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
    4600     off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
    4601     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
    4602     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    4603 
    4604     iemNativeRegFreeTmp(pReNative, iTmpReg);
    4605 
    4606 #elif defined(RT_ARCH_ARM64)
    4607     /*
    4608      * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
    4609      *     return raisexcpt();
    4610      * ----
    4611      *     mov     x1, 0x800000000000
    4612      *     add     x1, x0, x1
    4613      *     cmp     xzr, x1, lsr 48
    4614      *     b.ne    .Lraisexcpt
    4615      */
    4616     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    4617 
    4618     off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
    4619     off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
    4620     off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
    4621     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    4622 
    4623     iemNativeRegFreeTmp(pReNative, iTmpReg);
    4624 
    4625 #else
    4626 # error "Port me"
    4627 #endif
    4628     return off;
    4629 }
    4630 
    4631 
    4632 /**
    4633  * Emits code to check if the content of @a idxAddrReg is within the limit of
    4634  * idxSegReg, raising a \#GP(0) if it isn't.
    4635  *
    4636  * @returns New code buffer offset; throws VBox status code on error.
    4637  * @param   pReNative       The native recompile state.
    4638  * @param   off             The code buffer offset.
    4639  * @param   idxAddrReg      The host register (32-bit) with the address to
    4640  *                          check.
    4641  * @param   idxSegReg       The segment register (X86_SREG_XXX) to check
    4642  *                          against.
    4643  * @param   idxInstr        The current instruction.
    4644  */
    4645 DECL_HIDDEN_THROW(uint32_t)
    4646 iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    4647                                                     uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
    4648 {
    4649     /*
    4650      * Make sure we don't have any outstanding guest register writes as we may
    4651      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    4652      */
    4653     off = iemNativeRegFlushPendingWrites(pReNative, off);
    4654 
    4655 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    4656     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    4657 #else
    4658     RT_NOREF(idxInstr);
    4659 #endif
    4660 
    4661     /** @todo implement expand down/whatnot checking */
    4662     AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
    4663 
    4664     uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
    4665                                                                (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
    4666                                                                kIemNativeGstRegUse_ForUpdate);
    4667 
    4668     off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
    4669     off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    4670 
    4671     iemNativeRegFreeTmp(pReNative, iTmpLimReg);
    4672     return off;
    4673 }
    4674 
    4675 
    4676 /**
    4677  * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
    4678  *
    4679  * @returns The flush mask.
    4680  * @param   fCImpl          The IEM_CIMPL_F_XXX flags.
    4681  * @param   fGstShwFlush    The starting flush mask.
    4682  */
    4683 DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
    4684 {
    4685     if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
    4686         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_CS)
    4687                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_CS)
    4688                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
    4689     if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
    4690         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
    4691                      |  RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_SS)
    4692                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_SS)
    4693                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
    4694     else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
    4695         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
    4696     if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
    4697         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    4698     return fGstShwFlush;
    4699 }
    4700 
    4701 
    4702 /**
    4703  * Emits a call to a CImpl function or something similar.
    4704  */
    4705 static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
    4706                                       uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
    4707                                       uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
    4708 {
    4709     /*
    4710      * Flush stuff. PC and EFlags are implictly flushed, the latter because we
    4711      * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
    4712      */
    4713     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
    4714                                                              fGstShwFlush
    4715                                                              | RT_BIT_64(kIemNativeGstReg_Pc)
    4716                                                              | RT_BIT_64(kIemNativeGstReg_EFlags));
    4717     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    4718 
    4719     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    4720 
    4721     /*
    4722      * Load the parameters.
    4723      */
    4724 #if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
    4725     /* Special code the hidden VBOXSTRICTRC pointer. */
    4726     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    4727     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    4728     if (cAddParams > 0)
    4729         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
    4730     if (cAddParams > 1)
    4731         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
    4732     if (cAddParams > 2)
    4733         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
    4734     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    4735 
    4736 #else
    4737     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    4738     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    4739     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    4740     if (cAddParams > 0)
    4741         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
    4742     if (cAddParams > 1)
    4743         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
    4744     if (cAddParams > 2)
    4745 # if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
    4746         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
    4747 # else
    4748         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
    4749 # endif
    4750 #endif
    4751 
    4752     /*
    4753      * Make the call.
    4754      */
    4755     off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
    4756 
    4757 #if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    4758     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    4759 #endif
    4760 
    4761     /*
    4762      * Check the status code.
    4763      */
    4764     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    4765 }
    4766 
    4767 
    4768 /**
    4769  * Emits a call to a threaded worker function.
    4770  */
    4771 static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
    4772 {
    4773     iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
    4774     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    4775 
    4776 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    4777     /* The threaded function may throw / long jmp, so set current instruction
    4778        number if we're counting. */
    4779     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    4780 #endif
    4781 
    4782     uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
    4783 
    4784 #ifdef RT_ARCH_AMD64
    4785     /* Load the parameters and emit the call. */
    4786 # ifdef RT_OS_WINDOWS
    4787 #  ifndef VBOXSTRICTRC_STRICT_ENABLED
    4788     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    4789     if (cParams > 0)
    4790         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
    4791     if (cParams > 1)
    4792         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
    4793     if (cParams > 2)
    4794         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
    4795 #  else  /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
    4796     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
    4797     if (cParams > 0)
    4798         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
    4799     if (cParams > 1)
    4800         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
    4801     if (cParams > 2)
    4802     {
    4803         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
    4804         off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
    4805     }
    4806     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    4807 #  endif /* VBOXSTRICTRC_STRICT_ENABLED */
    4808 # else
    4809     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    4810     if (cParams > 0)
    4811         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
    4812     if (cParams > 1)
    4813         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
    4814     if (cParams > 2)
    4815         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
    4816 # endif
    4817 
    4818     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    4819 
    4820 # if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    4821     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    4822 # endif
    4823 
    4824 #elif RT_ARCH_ARM64
    4825     /*
    4826      * ARM64:
    4827      */
    4828     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    4829     if (cParams > 0)
    4830         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
    4831     if (cParams > 1)
    4832         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
    4833     if (cParams > 2)
    4834         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
    4835 
    4836     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    4837 
    4838 #else
    4839 # error "port me"
    4840 #endif
    4841 
    4842     /*
    4843      * Check the status code.
    4844      */
    4845     off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
    4846 
    4847     return off;
    4848 }
    4849 
    4850 
    4851 /**
    4852  * Emits the code at the NeedCsLimChecking label.
    4853  */
    4854 static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    4855 {
    4856     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
    4857     if (idxLabel != UINT32_MAX)
    4858     {
    4859         iemNativeLabelDefine(pReNative, idxLabel, off);
    4860 
    4861         /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
    4862         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    4863         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
    4864 
    4865         /* jump back to the return sequence. */
    4866         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    4867     }
    4868     return off;
    4869 }
    4870 
    4871 
    4872 /**
    4873  * Emits the code at the ObsoleteTb label.
    4874  */
    4875 static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    4876 {
    4877     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
    4878     if (idxLabel != UINT32_MAX)
    4879     {
    4880         iemNativeLabelDefine(pReNative, idxLabel, off);
    4881 
    4882         /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
    4883         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    4884         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
    4885 
    4886         /* jump back to the return sequence. */
    4887         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    4888     }
    4889     return off;
    4890 }
    4891 
    4892 
    4893 /**
    4894  * Emits the code at the RaiseGP0 label.
    4895  */
    4896 static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    4897 {
    4898     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
    4899     if (idxLabel != UINT32_MAX)
    4900     {
    4901         iemNativeLabelDefine(pReNative, idxLabel, off);
    4902 
    4903         /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
    4904         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    4905         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
    4906 
    4907         /* jump back to the return sequence. */
    4908         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    4909     }
    4910     return off;
    4911 }
    4912 
    4913 
    4914 /**
    4915  * Emits the code at the ReturnWithFlags label (returns
    4916  * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
    4917  */
    4918 static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    4919 {
    4920     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
    4921     if (idxLabel != UINT32_MAX)
    4922     {
    4923         iemNativeLabelDefine(pReNative, idxLabel, off);
    4924 
    4925         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
    4926 
    4927         /* jump back to the return sequence. */
    4928         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    4929     }
    4930     return off;
    4931 }
    4932 
    4933 
    4934 /**
    4935  * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
    4936  */
    4937 static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    4938 {
    4939     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
    4940     if (idxLabel != UINT32_MAX)
    4941     {
    4942         iemNativeLabelDefine(pReNative, idxLabel, off);
    4943 
    4944         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
    4945 
    4946         /* jump back to the return sequence. */
    4947         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    4948     }
    4949     return off;
    4950 }
    4951 
    4952 
    4953 /**
    4954  * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
    4955  */
    4956 static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    4957 {
    4958     /*
    4959      * Generate the rc + rcPassUp fiddling code if needed.
    4960      */
    4961     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    4962     if (idxLabel != UINT32_MAX)
    4963     {
    4964         iemNativeLabelDefine(pReNative, idxLabel, off);
    4965 
    4966         /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
    4967 #ifdef RT_ARCH_AMD64
    4968 # ifdef RT_OS_WINDOWS
    4969 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    4970         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8,  X86_GREG_xCX); /* cl = instruction number */
    4971 #  endif
    4972         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    4973         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
    4974 # else
    4975         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    4976         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
    4977 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    4978         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
    4979 #  endif
    4980 # endif
    4981 # ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    4982         off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
    4983 # endif
    4984 
    4985 #else
    4986         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
    4987         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    4988         /* IEMNATIVE_CALL_ARG2_GREG is already set. */
    4989 #endif
    4990 
    4991         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
    4992         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    4993     }
    4994     return off;
    4995 }
    4996 
    4997 
    4998 /**
    4999  * Emits a standard epilog.
    5000  */
    5001 static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
    5002 {
    5003     *pidxReturnLabel = UINT32_MAX;
    5004 
    5005     /*
    5006      * Successful return, so clear the return register (eax, w0).
    5007      */
    5008     off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
    5009 
    5010     /*
    5011      * Define label for common return point.
    5012      */
    5013     uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
    5014     *pidxReturnLabel = idxReturn;
    5015 
    5016     /*
    5017      * Restore registers and return.
    5018      */
    5019 #ifdef RT_ARCH_AMD64
    5020     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    5021 
    5022     /* Reposition esp at the r15 restore point. */
    5023     pbCodeBuf[off++] = X86_OP_REX_W;
    5024     pbCodeBuf[off++] = 0x8d;                    /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
    5025     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
    5026     pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
    5027 
    5028     /* Pop non-volatile registers and return */
    5029     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r15 */
    5030     pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
    5031     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r14 */
    5032     pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
    5033     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r13 */
    5034     pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
    5035     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r12 */
    5036     pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
    5037 # ifdef RT_OS_WINDOWS
    5038     pbCodeBuf[off++] = 0x58 + X86_GREG_xDI;     /* pop rdi */
    5039     pbCodeBuf[off++] = 0x58 + X86_GREG_xSI;     /* pop rsi */
    5040 # endif
    5041     pbCodeBuf[off++] = 0x58 + X86_GREG_xBX;     /* pop rbx */
    5042     pbCodeBuf[off++] = 0xc9;                    /* leave */
    5043     pbCodeBuf[off++] = 0xc3;                    /* ret */
    5044     pbCodeBuf[off++] = 0xcc;                    /* int3 poison */
    5045 
    5046 #elif RT_ARCH_ARM64
    5047     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    5048 
    5049     /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
    5050     AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
    5051     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    5052                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    5053                                                  IEMNATIVE_FRAME_VAR_SIZE / 8);
    5054     /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
    5055     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5056                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    5057     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5058                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    5059     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5060                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    5061     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5062                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    5063     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5064                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    5065     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    5066 
    5067     /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ;  */
    5068     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
    5069     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
    5070                                                      IEMNATIVE_FRAME_SAVE_REG_SIZE);
    5071 
    5072     /* retab / ret */
    5073 # ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
    5074     if (1)
    5075         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
    5076     else
    5077 # endif
    5078         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
    5079 
    5080 #else
    5081 # error "port me"
    5082 #endif
    5083     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5084 
    5085     return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
    5086 }
    5087 
    5088 
    5089 /**
    5090  * Emits a standard prolog.
    5091  */
    5092 static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5093 {
    5094 #ifdef RT_ARCH_AMD64
    5095     /*
    5096      * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
    5097      * reserving 64 bytes for stack variables plus 4 non-register argument
    5098      * slots.  Fixed register assignment: xBX = pReNative;
    5099      *
    5100      * Since we always do the same register spilling, we can use the same
    5101      * unwind description for all the code.
    5102      */
    5103     uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    5104     pbCodeBuf[off++] = 0x50 + X86_GREG_xBP;     /* push rbp */
    5105     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbp, rsp */
    5106     pbCodeBuf[off++] = 0x8b;
    5107     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
    5108     pbCodeBuf[off++] = 0x50 + X86_GREG_xBX;     /* push rbx */
    5109     AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
    5110 # ifdef RT_OS_WINDOWS
    5111     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rcx ; RBX = pVCpu */
    5112     pbCodeBuf[off++] = 0x8b;
    5113     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
    5114     pbCodeBuf[off++] = 0x50 + X86_GREG_xSI;     /* push rsi */
    5115     pbCodeBuf[off++] = 0x50 + X86_GREG_xDI;     /* push rdi */
    5116 # else
    5117     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rdi ; RBX = pVCpu */
    5118     pbCodeBuf[off++] = 0x8b;
    5119     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
    5120 # endif
    5121     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r12 */
    5122     pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
    5123     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r13 */
    5124     pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
    5125     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r14 */
    5126     pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
    5127     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r15 */
    5128     pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
    5129 
    5130     off = iemNativeEmitSubGprImm(pReNative, off,    /* sub rsp, byte 28h */
    5131                                  X86_GREG_xSP,
    5132                                    IEMNATIVE_FRAME_ALIGN_SIZE
    5133                                  + IEMNATIVE_FRAME_VAR_SIZE
    5134                                  + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
    5135                                  + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
    5136     AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
    5137     AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
    5138     AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
    5139 
    5140 #elif RT_ARCH_ARM64
    5141     /*
    5142      * We set up a stack frame exactly like on x86, only we have to push the
    5143      * return address our selves here.  We save all non-volatile registers.
    5144      */
    5145     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    5146 
    5147 # ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
    5148                       * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind.  It's
    5149                       * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
    5150                       * in any way conditional, so just emitting this instructions now and hoping for the best... */
    5151     /* pacibsp */
    5152     pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
    5153 # endif
    5154 
    5155     /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
    5156     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
    5157     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    5158                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    5159                                                  -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
    5160     /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
    5161     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5162                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    5163     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5164                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    5165     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5166                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    5167     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5168                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    5169     /* Save the BP and LR (ret address) registers at the top of the frame. */
    5170     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5171                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    5172     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    5173     /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
    5174     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
    5175                                                      ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
    5176 
    5177     /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ;  Allocate the variable area from SP. */
    5178     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
    5179 
    5180     /* mov r28, r0  */
    5181     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
    5182     /* mov r27, r1  */
    5183     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
    5184 
    5185 #else
    5186 # error "port me"
    5187 #endif
    5188     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5189     return off;
    5190 }
    5191 
    5192 
    5193 
    5194 
    5195 /*********************************************************************************************************************************
    5196 *   Emitters for IEM_MC_BEGIN and IEM_MC_END.                                                                                    *
    5197 *********************************************************************************************************************************/
    5198 
    5199 #define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
    5200     { \
    5201         Assert(pReNative->Core.bmVars     == 0); \
    5202         Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
    5203         Assert(pReNative->Core.bmStack    == 0); \
    5204         pReNative->fMc    = (a_fMcFlags); \
    5205         pReNative->fCImpl = (a_fCImplFlags); \
    5206         pReNative->cArgs  = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
    5207 
    5208 /** We have to get to the end in recompilation mode, as otherwise we won't
    5209  * generate code for all the IEM_MC_IF_XXX branches. */
    5210 #define IEM_MC_END() \
    5211         iemNativeVarFreeAll(pReNative); \
    5212     } return off
    5213 
    5214 
    5215 
    5216 /*********************************************************************************************************************************
    5217 *   Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX)                                               *
    5218 *********************************************************************************************************************************/
    5219 
    5220 #define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
    5221     pReNative->fMc    = 0; \
    5222     pReNative->fCImpl = (a_fFlags); \
    5223     return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
    5224 
    5225 
    5226 #define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
    5227     pReNative->fMc    = 0; \
    5228     pReNative->fCImpl = (a_fFlags); \
    5229     return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
    5230 
    5231 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5232                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    5233                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
    5234 {
    5235     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
    5236 }
    5237 
    5238 
    5239 #define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
    5240     pReNative->fMc    = 0; \
    5241     pReNative->fCImpl = (a_fFlags); \
    5242     return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
    5243                                    (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
    5244 
    5245 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5246                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    5247                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
    5248 {
    5249     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
    5250 }
    5251 
    5252 
    5253 #define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
    5254     pReNative->fMc    = 0; \
    5255     pReNative->fCImpl = (a_fFlags); \
    5256     return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
    5257                                    (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
    5258 
    5259 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5260                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    5261                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
    5262                                                     uint64_t uArg2)
    5263 {
    5264     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
    5265 }
    5266 
    5267 
    5268 
    5269 /*********************************************************************************************************************************
    5270 *   Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX)                                                     *
    5271 *********************************************************************************************************************************/
    5272 
    5273 /** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
    5274  *  and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
    5275 DECL_INLINE_THROW(uint32_t)
    5276 iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5277 {
    5278     /*
    5279      * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
    5280      * return with special status code and make the execution loop deal with
    5281      * this.  If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
    5282      * exception and won't continue execution.  While CPUMCTX_DBG_DBGF_MASK
    5283      * could continue w/o interruption, it probably will drop into the
    5284      * debugger, so not worth the effort of trying to services it here and we
    5285      * just lump it in with the handling of the others.
    5286      *
    5287      * To simplify the code and the register state management even more (wrt
    5288      * immediate in AND operation), we always update the flags and skip the
    5289      * extra check associated conditional jump.
    5290      */
    5291     AssertCompile(   (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
    5292                   <= UINT32_MAX);
    5293     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    5294                                                              kIemNativeGstRegUse_ForUpdate);
    5295     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
    5296                                                              X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
    5297                                                              iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
    5298     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
    5299     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
    5300 
    5301     /* Free but don't flush the EFLAGS register. */
    5302     iemNativeRegFreeTmp(pReNative, idxEflReg);
    5303 
    5304     return off;
    5305 }
    5306 
    5307 
    5308 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
    5309     off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
    5310 
    5311 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
    5312     IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
    5313     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5314 
    5315 /** Same as iemRegAddToRip64AndFinishingNoFlags. */
    5316 DECL_INLINE_THROW(uint32_t)
    5317 iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    5318 {
    5319     /* Allocate a temporary PC register. */
    5320     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5321 
    5322     /* Perform the addition and store the result. */
    5323     off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
    5324     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5325 
    5326     /* Free but don't flush the PC register. */
    5327     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5328 
    5329     return off;
    5330 }
    5331 
    5332 
    5333 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
    5334     off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
    5335 
    5336 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
    5337     IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
    5338     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5339 
    5340 /** Same as iemRegAddToEip32AndFinishingNoFlags. */
    5341 DECL_INLINE_THROW(uint32_t)
    5342 iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    5343 {
    5344     /* Allocate a temporary PC register. */
    5345     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5346 
    5347     /* Perform the addition and store the result. */
    5348     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
    5349     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5350 
    5351     /* Free but don't flush the PC register. */
    5352     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5353 
    5354     return off;
    5355 }
    5356 
    5357 
    5358 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
    5359     off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
    5360 
    5361 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
    5362     IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
    5363     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5364 
    5365 /** Same as iemRegAddToIp16AndFinishingNoFlags. */
    5366 DECL_INLINE_THROW(uint32_t)
    5367 iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    5368 {
    5369     /* Allocate a temporary PC register. */
    5370     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5371 
    5372     /* Perform the addition and store the result. */
    5373     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
    5374     off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    5375     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5376 
    5377     /* Free but don't flush the PC register. */
    5378     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5379 
    5380     return off;
    5381 }
    5382 
    5383 
    5384 
    5385 /*********************************************************************************************************************************
    5386 *   Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX).                                *
    5387 *********************************************************************************************************************************/
    5388 
    5389 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
    5390     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    5391                                                             (a_enmEffOpSize), pCallEntry->idxInstr)
    5392 
    5393 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
    5394     IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
    5395     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5396 
    5397 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
    5398     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    5399                                                             IEMMODE_16BIT, pCallEntry->idxInstr)
    5400 
    5401 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
    5402     IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
    5403     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5404 
    5405 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
    5406     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    5407                                                             IEMMODE_64BIT, pCallEntry->idxInstr)
    5408 
    5409 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
    5410     IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
    5411     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5412 
    5413 /** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
    5414  *  iemRegRip64RelativeJumpS16AndFinishNoFlags and
    5415  *  iemRegRip64RelativeJumpS32AndFinishNoFlags. */
    5416 DECL_INLINE_THROW(uint32_t)
    5417 iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
    5418                                                   int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
    5419 {
    5420     Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
    5421 
    5422     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    5423     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5424 
    5425     /* Allocate a temporary PC register. */
    5426     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5427 
    5428     /* Perform the addition. */
    5429     off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
    5430 
    5431     if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
    5432     {
    5433         /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
    5434         off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    5435     }
    5436     else
    5437     {
    5438         /* Just truncate the result to 16-bit IP. */
    5439         Assert(enmEffOpSize == IEMMODE_16BIT);
    5440         off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    5441     }
    5442     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5443 
    5444     /* Free but don't flush the PC register. */
    5445     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5446 
    5447     return off;
    5448 }
    5449 
    5450 
    5451 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
    5452     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    5453                                                             (a_enmEffOpSize), pCallEntry->idxInstr)
    5454 
    5455 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
    5456     IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
    5457     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5458 
    5459 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
    5460     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    5461                                                             IEMMODE_16BIT, pCallEntry->idxInstr)
    5462 
    5463 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
    5464     IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
    5465     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5466 
    5467 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
    5468     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    5469                                                             IEMMODE_32BIT, pCallEntry->idxInstr)
    5470 
    5471 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
    5472     IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
    5473     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5474 
    5475 /** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
    5476  *  iemRegEip32RelativeJumpS16AndFinishNoFlags and
    5477  *  iemRegEip32RelativeJumpS32AndFinishNoFlags. */
    5478 DECL_INLINE_THROW(uint32_t)
    5479 iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
    5480                                                   int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
    5481 {
    5482     Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
    5483 
    5484     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    5485     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5486 
    5487     /* Allocate a temporary PC register. */
    5488     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5489 
    5490     /* Perform the addition. */
    5491     off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
    5492 
    5493     /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
    5494     if (enmEffOpSize == IEMMODE_16BIT)
    5495         off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    5496 
    5497     /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
    5498     off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
    5499 
    5500     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5501 
    5502     /* Free but don't flush the PC register. */
    5503     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5504 
    5505     return off;
    5506 }
    5507 
    5508 
    5509 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
    5510     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
    5511 
    5512 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
    5513     IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
    5514     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5515 
    5516 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
    5517     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
    5518 
    5519 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
    5520     IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
    5521     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5522 
    5523 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
    5524     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
    5525 
    5526 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
    5527     IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
    5528     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5529 
    5530 /** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
    5531 DECL_INLINE_THROW(uint32_t)
    5532 iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5533                                                  uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
    5534 {
    5535     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    5536     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5537 
    5538     /* Allocate a temporary PC register. */
    5539     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5540 
    5541     /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
    5542     off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
    5543     off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    5544     off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
    5545     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5546 
    5547     /* Free but don't flush the PC register. */
    5548     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5549 
    5550     return off;
    5551 }
    5552 
    5553 
    5554 
    5555 /*********************************************************************************************************************************
    5556 *   Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH).                                    *
    5557 *********************************************************************************************************************************/
    5558 
    5559 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
    5560 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
    5561     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    5562 
    5563 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
    5564 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
    5565     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    5566 
    5567 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
    5568 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
    5569     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    5570 
    5571 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
    5572  *  clears flags. */
    5573 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
    5574     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
    5575     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5576 
    5577 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
    5578  *  clears flags. */
    5579 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
    5580     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
    5581     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5582 
    5583 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
    5584  *  clears flags. */
    5585 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
    5586     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
    5587     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5588 
    5589 #undef IEM_MC_SET_RIP_U16_AND_FINISH
    5590 
    5591 
    5592 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
    5593 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
    5594     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
    5595 
    5596 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
    5597 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
    5598     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
    5599 
    5600 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
    5601  *  clears flags. */
    5602 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
    5603     IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
    5604     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5605 
    5606 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
    5607  *  and clears flags. */
    5608 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
    5609     IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
    5610     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5611 
    5612 #undef IEM_MC_SET_RIP_U32_AND_FINISH
    5613 
    5614 
    5615 /** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
    5616 #define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
    5617     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
    5618 
    5619 /** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
    5620  *  and clears flags. */
    5621 #define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
    5622     IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
    5623     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5624 
    5625 #undef IEM_MC_SET_RIP_U64_AND_FINISH
    5626 
    5627 
    5628 /** Same as iemRegRipJumpU16AndFinishNoFlags,
    5629  *  iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
    5630 DECL_INLINE_THROW(uint32_t)
    5631 iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
    5632                             uint8_t idxInstr, uint8_t cbVar)
    5633 {
    5634     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
    5635     Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
    5636 
    5637     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    5638     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5639 
    5640     /* Get a register with the new PC loaded from idxVarPc.
    5641        Note! This ASSUMES that the high bits of the GPR is zeroed. */
    5642     uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
    5643 
    5644     /* Check limit (may #GP(0) + exit TB). */
    5645     if (!f64Bit)
    5646         off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
    5647     /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
    5648     else if (cbVar > sizeof(uint32_t))
    5649         off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    5650 
    5651     /* Store the result. */
    5652     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5653 
    5654     /** @todo implictly free the variable? */
    5655 
    5656     return off;
    5657 }
    5658 
    5659 
    5660 
    5661 /*********************************************************************************************************************************
    5662 *   Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF)                                                         *
    5663 *********************************************************************************************************************************/
    5664 
    5665 /**
    5666  * Pushes an IEM_MC_IF_XXX onto the condition stack.
    5667  *
    5668  * @returns Pointer to the condition stack entry on success, NULL on failure
    5669  *          (too many nestings)
    5670  */
    5671 DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
    5672 {
    5673     uint32_t const idxStack = pReNative->cCondDepth;
    5674     AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
    5675 
    5676     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
    5677     pReNative->cCondDepth = (uint8_t)(idxStack + 1);
    5678 
    5679     uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
    5680     pEntry->fInElse       = false;
    5681     pEntry->idxLabelElse  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
    5682     pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
    5683 
    5684     return pEntry;
    5685 }
    5686 
    5687 
    5688 /**
    5689  * Start of the if-block, snapshotting the register and variable state.
    5690  */
    5691 DECL_INLINE_THROW(void)
    5692 iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
    5693 {
    5694     Assert(offIfBlock != UINT32_MAX);
    5695     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    5696     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    5697     Assert(!pEntry->fInElse);
    5698 
    5699     /* Define the start of the IF block if request or for disassembly purposes. */
    5700     if (idxLabelIf != UINT32_MAX)
    5701         iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
    5702 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5703     else
    5704         iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
    5705 #else
    5706     RT_NOREF(offIfBlock);
    5707 #endif
    5708 
    5709     /* Copy the initial state so we can restore it in the 'else' block. */
    5710     pEntry->InitialState = pReNative->Core;
    5711 }
    5712 
    5713 
    5714 #define IEM_MC_ELSE() } while (0); \
    5715         off = iemNativeEmitElse(pReNative, off); \
    5716         do {
    5717 
    5718 /** Emits code related to IEM_MC_ELSE. */
    5719 DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5720 {
    5721     /* Check sanity and get the conditional stack entry. */
    5722     Assert(off != UINT32_MAX);
    5723     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    5724     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    5725     Assert(!pEntry->fInElse);
    5726 
    5727     /* Jump to the endif */
    5728     off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
    5729 
    5730     /* Define the else label and enter the else part of the condition. */
    5731     iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
    5732     pEntry->fInElse = true;
    5733 
    5734     /* Snapshot the core state so we can do a merge at the endif and restore
    5735        the snapshot we took at the start of the if-block. */
    5736     pEntry->IfFinalState = pReNative->Core;
    5737     pReNative->Core = pEntry->InitialState;
    5738 
    5739     return off;
    5740 }
    5741 
    5742 
    5743 #define IEM_MC_ENDIF() } while (0); \
    5744         off = iemNativeEmitEndIf(pReNative, off)
    5745 
    5746 /** Emits code related to IEM_MC_ENDIF. */
    5747 DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5748 {
    5749     /* Check sanity and get the conditional stack entry. */
    5750     Assert(off != UINT32_MAX);
    5751     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    5752     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    5753 
    5754     /*
    5755      * Now we have find common group with the core state at the end of the
    5756      * if-final.  Use the smallest common denominator and just drop anything
    5757      * that isn't the same in both states.
    5758      */
    5759     /** @todo We could, maybe, shuffle registers around if we thought it helpful,
    5760      *        which is why we're doing this at the end of the else-block.
    5761      *        But we'd need more info about future for that to be worth the effort. */
    5762     PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
    5763     if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
    5764     {
    5765         /* shadow guest stuff first. */
    5766         uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
    5767         if (fGstRegs)
    5768         {
    5769             Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
    5770             do
    5771             {
    5772                 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    5773                 fGstRegs &= ~RT_BIT_64(idxGstReg);
    5774 
    5775                 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    5776                 if (  !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
    5777                     || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
    5778                 {
    5779                     Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
    5780                            g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
    5781                     iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
    5782                 }
    5783             } while (fGstRegs);
    5784         }
    5785         else
    5786             Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
    5787 
    5788         /* Check variables next. For now we must require them to be identical
    5789            or stuff we can recreate. */
    5790         Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
    5791         uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
    5792         if (fVars)
    5793         {
    5794             uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
    5795             do
    5796             {
    5797                 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
    5798                 fVars &= ~RT_BIT_32(idxVar);
    5799 
    5800                 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
    5801                 {
    5802                     if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
    5803                         continue;
    5804                     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    5805                     {
    5806                         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    5807                         if (idxHstReg != UINT8_MAX)
    5808                         {
    5809                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    5810                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    5811                             Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
    5812                                    g_apszIemNativeHstRegNames[idxHstReg], idxVar));
    5813                         }
    5814                         continue;
    5815                     }
    5816                 }
    5817                 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
    5818                     continue;
    5819 
    5820                 /* Irreconcilable, so drop it. */
    5821                 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    5822                 if (idxHstReg != UINT8_MAX)
    5823                 {
    5824                     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    5825                     pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    5826                     Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
    5827                            g_apszIemNativeHstRegNames[idxHstReg], idxVar));
    5828                 }
    5829                 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
    5830                 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    5831             } while (fVars);
    5832         }
    5833 
    5834         /* Finally, check that the host register allocations matches. */
    5835         AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
    5836                       ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
    5837                        pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
    5838                       IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
    5839     }
    5840 
    5841     /*
    5842      * Define the endif label and maybe the else one if we're still in the 'if' part.
    5843      */
    5844     if (!pEntry->fInElse)
    5845         iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
    5846     else
    5847         Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
    5848     iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
    5849 
    5850     /* Pop the conditional stack.*/
    5851     pReNative->cCondDepth -= 1;
    5852 
    5853     return off;
    5854 }
    5855 
    5856 
    5857 #define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
    5858         off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
    5859         do {
    5860 
    5861 /** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
    5862 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
    5863 {
    5864     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    5865 
    5866     /* Get the eflags. */
    5867     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    5868                                                               kIemNativeGstRegUse_ReadOnly);
    5869 
    5870     /* Test and jump. */
    5871     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
    5872 
    5873     /* Free but don't flush the EFlags register. */
    5874     iemNativeRegFreeTmp(pReNative, idxEflReg);
    5875 
    5876     /* Make a copy of the core state now as we start the if-block. */
    5877     iemNativeCondStartIfBlock(pReNative, off);
    5878 
    5879     return off;
    5880 }
    5881 
    5882 
    5883 #define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
    5884         off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
    5885         do {
    5886 
    5887 /** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
    5888 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
    5889 {
    5890     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    5891 
    5892     /* Get the eflags. */
    5893     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    5894                                                               kIemNativeGstRegUse_ReadOnly);
    5895 
    5896     /* Test and jump. */
    5897     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
    5898 
    5899     /* Free but don't flush the EFlags register. */
    5900     iemNativeRegFreeTmp(pReNative, idxEflReg);
    5901 
    5902     /* Make a copy of the core state now as we start the if-block. */
    5903     iemNativeCondStartIfBlock(pReNative, off);
    5904 
    5905     return off;
    5906 }
    5907 
    5908 
    5909 #define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
    5910         off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
    5911         do {
    5912 
    5913 /** Emits code for IEM_MC_IF_EFL_BIT_SET. */
    5914 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
    5915 {
    5916     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    5917 
    5918     /* Get the eflags. */
    5919     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    5920                                                               kIemNativeGstRegUse_ReadOnly);
    5921 
    5922     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    5923     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    5924 
    5925     /* Test and jump. */
    5926     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    5927 
    5928     /* Free but don't flush the EFlags register. */
    5929     iemNativeRegFreeTmp(pReNative, idxEflReg);
    5930 
    5931     /* Make a copy of the core state now as we start the if-block. */
    5932     iemNativeCondStartIfBlock(pReNative, off);
    5933 
    5934     return off;
    5935 }
    5936 
    5937 
    5938 #define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
    5939         off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
    5940         do {
    5941 
    5942 /** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
    5943 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
    5944 {
    5945     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    5946 
    5947     /* Get the eflags. */
    5948     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    5949                                                               kIemNativeGstRegUse_ReadOnly);
    5950 
    5951     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    5952     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    5953 
    5954     /* Test and jump. */
    5955     off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    5956 
    5957     /* Free but don't flush the EFlags register. */
    5958     iemNativeRegFreeTmp(pReNative, idxEflReg);
    5959 
    5960     /* Make a copy of the core state now as we start the if-block. */
    5961     iemNativeCondStartIfBlock(pReNative, off);
    5962 
    5963     return off;
    5964 }
    5965 
    5966 
    5967 #define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2)         \
    5968     off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
    5969     do {
    5970 
    5971 #define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2)         \
    5972     off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
    5973     do {
    5974 
    5975 /** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
    5976 DECL_INLINE_THROW(uint32_t)
    5977 iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5978                                   uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
    5979 {
    5980     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    5981 
    5982     /* Get the eflags. */
    5983     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    5984                                                               kIemNativeGstRegUse_ReadOnly);
    5985 
    5986     unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
    5987     Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
    5988 
    5989     unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
    5990     Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
    5991     Assert(iBitNo1 != iBitNo2);
    5992 
    5993 #ifdef RT_ARCH_AMD64
    5994     uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
    5995 
    5996     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    5997     if (iBitNo1 > iBitNo2)
    5998         off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
    5999     else
    6000         off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
    6001     off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    6002 
    6003 #elif defined(RT_ARCH_ARM64)
    6004     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    6005     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    6006 
    6007     /* and tmpreg, eflreg, #1<<iBitNo1 */
    6008     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
    6009 
    6010     /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
    6011     if (iBitNo1 > iBitNo2)
    6012         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    6013                                                 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
    6014     else
    6015         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    6016                                                 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
    6017 
    6018     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6019 
    6020 #else
    6021 # error "Port me"
    6022 #endif
    6023 
    6024     /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
    6025     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
    6026                                                      pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
    6027 
    6028     /* Free but don't flush the EFlags and tmp registers. */
    6029     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    6030     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6031 
    6032     /* Make a copy of the core state now as we start the if-block. */
    6033     iemNativeCondStartIfBlock(pReNative, off);
    6034 
    6035     return off;
    6036 }
    6037 
    6038 
    6039 #define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
    6040     off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
    6041     do {
    6042 
    6043 #define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
    6044     off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
    6045     do {
    6046 
    6047 /** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
    6048  *  IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
    6049 DECL_INLINE_THROW(uint32_t)
    6050 iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
    6051                                               uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
    6052 {
    6053     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6054 
    6055     /* We need an if-block label for the non-inverted variant. */
    6056     uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
    6057                                                                  pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
    6058 
    6059     /* Get the eflags. */
    6060     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6061                                                               kIemNativeGstRegUse_ReadOnly);
    6062 
    6063     /* Translate the flag masks to bit numbers. */
    6064     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    6065     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    6066 
    6067     unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
    6068     Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
    6069     Assert(iBitNo1 != iBitNo);
    6070 
    6071     unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
    6072     Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
    6073     Assert(iBitNo2 != iBitNo);
    6074     Assert(iBitNo2 != iBitNo1);
    6075 
    6076 #ifdef RT_ARCH_AMD64
    6077     uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
    6078 #elif defined(RT_ARCH_ARM64)
    6079     uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    6080 #endif
    6081 
    6082     /* Check for the lone bit first. */
    6083     if (!fInverted)
    6084         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    6085     else
    6086         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
    6087 
    6088     /* Then extract and compare the other two bits. */
    6089 #ifdef RT_ARCH_AMD64
    6090     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    6091     if (iBitNo1 > iBitNo2)
    6092         off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
    6093     else
    6094         off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
    6095     off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    6096 
    6097 #elif defined(RT_ARCH_ARM64)
    6098     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    6099 
    6100     /* and tmpreg, eflreg, #1<<iBitNo1 */
    6101     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
    6102 
    6103     /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
    6104     if (iBitNo1 > iBitNo2)
    6105         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    6106                                                 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
    6107     else
    6108         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    6109                                                 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
    6110 
    6111     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6112 
    6113 #else
    6114 # error "Port me"
    6115 #endif
    6116 
    6117     /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
    6118     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
    6119                                                      pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
    6120 
    6121     /* Free but don't flush the EFlags and tmp registers. */
    6122     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    6123     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6124 
    6125     /* Make a copy of the core state now as we start the if-block. */
    6126     iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
    6127 
    6128     return off;
    6129 }
    6130 
    6131 
    6132 #define IEM_MC_IF_CX_IS_NZ() \
    6133     off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
    6134     do {
    6135 
    6136 /** Emits code for IEM_MC_IF_CX_IS_NZ. */
    6137 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    6138 {
    6139     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6140 
    6141     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    6142                                                                  kIemNativeGstRegUse_ReadOnly);
    6143     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
    6144     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    6145 
    6146     iemNativeCondStartIfBlock(pReNative, off);
    6147     return off;
    6148 }
    6149 
    6150 
    6151 #define IEM_MC_IF_ECX_IS_NZ() \
    6152     off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
    6153     do {
    6154 
    6155 #define IEM_MC_IF_RCX_IS_NZ() \
    6156     off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
    6157     do {
    6158 
    6159 /** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
    6160 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
    6161 {
    6162     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6163 
    6164     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    6165                                                                  kIemNativeGstRegUse_ReadOnly);
    6166     off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
    6167     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    6168 
    6169     iemNativeCondStartIfBlock(pReNative, off);
    6170     return off;
    6171 }
    6172 
    6173 
    6174 #define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
    6175     off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
    6176     do {
    6177 
    6178 #define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
    6179     off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
    6180     do {
    6181 
    6182 /** Emits code for IEM_MC_IF_CX_IS_NZ. */
    6183 DECL_INLINE_THROW(uint32_t)
    6184 iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
    6185 {
    6186     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6187 
    6188     /* We have to load both RCX and EFLAGS before we can start branching,
    6189        otherwise we'll end up in the else-block with an inconsistent
    6190        register allocator state.
    6191        Doing EFLAGS first as it's more likely to be loaded, right? */
    6192     uint8_t const idxEflReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6193                                                                  kIemNativeGstRegUse_ReadOnly);
    6194     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    6195                                                                  kIemNativeGstRegUse_ReadOnly);
    6196 
    6197     /** @todo we could reduce this to a single branch instruction by spending a
    6198      *        temporary register and some setnz stuff.  Not sure if loops are
    6199      *        worth it. */
    6200     /* Check CX. */
    6201     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
    6202 
    6203     /* Check the EFlags bit. */
    6204     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    6205     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    6206     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
    6207                                                      !fCheckIfSet /*fJmpIfSet*/);
    6208 
    6209     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    6210     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6211 
    6212     iemNativeCondStartIfBlock(pReNative, off);
    6213     return off;
    6214 }
    6215 
    6216 
    6217 #define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
    6218     off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
    6219     do {
    6220 
    6221 #define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
    6222     off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
    6223     do {
    6224 
    6225 #define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
    6226     off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
    6227     do {
    6228 
    6229 #define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
    6230     off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
    6231     do {
    6232 
    6233 /** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
    6234  *  IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
    6235  *  IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
    6236  *  IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
    6237 DECL_INLINE_THROW(uint32_t)
    6238 iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6239                                                uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
    6240 {
    6241     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6242 
    6243     /* We have to load both RCX and EFLAGS before we can start branching,
    6244        otherwise we'll end up in the else-block with an inconsistent
    6245        register allocator state.
    6246        Doing EFLAGS first as it's more likely to be loaded, right? */
    6247     uint8_t const idxEflReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6248                                                                  kIemNativeGstRegUse_ReadOnly);
    6249     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    6250                                                                  kIemNativeGstRegUse_ReadOnly);
    6251 
    6252     /** @todo we could reduce this to a single branch instruction by spending a
    6253      *        temporary register and some setnz stuff.  Not sure if loops are
    6254      *        worth it. */
    6255     /* Check RCX/ECX. */
    6256     off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
    6257 
    6258     /* Check the EFlags bit. */
    6259     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    6260     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    6261     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
    6262                                                      !fCheckIfSet /*fJmpIfSet*/);
    6263 
    6264     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    6265     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6266 
    6267     iemNativeCondStartIfBlock(pReNative, off);
    6268     return off;
    6269 }
    6270 
    6271 
    6272 
    6273 /*********************************************************************************************************************************
    6274 *   Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++                                                            *
    6275 *********************************************************************************************************************************/
    6276 /** Number of hidden arguments for CIMPL calls.
    6277  * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
    6278 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    6279 # define IEM_CIMPL_HIDDEN_ARGS 3
    6280 #else
    6281 # define IEM_CIMPL_HIDDEN_ARGS 2
    6282 #endif
    6283 
    6284 #define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
    6285     uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
    6286 
    6287 #define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
    6288     uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
    6289 
    6290 #define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
    6291     uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
    6292 
    6293 #define IEM_MC_LOCAL(a_Type, a_Name) \
    6294     uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
    6295 
    6296 #define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
    6297     uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
    6298 
    6299 
    6300 /**
    6301  * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
    6302  */
    6303 DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
    6304 {
    6305     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
    6306         return IEM_CIMPL_HIDDEN_ARGS;
    6307     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
    6308         return 1;
    6309     return 0;
    6310 }
    6311 
    6312 
    6313 /**
    6314  * Internal work that allocates a variable with kind set to
    6315  * kIemNativeVarKind_Invalid and no current stack allocation.
    6316  *
    6317  * The kind will either be set by the caller or later when the variable is first
    6318  * assigned a value.
    6319  */
    6320 static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    6321 {
    6322     Assert(cbType > 0 && cbType <= 64);
    6323     unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
    6324     AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
    6325     pReNative->Core.bmVars |= RT_BIT_32(idxVar);
    6326     pReNative->Core.aVars[idxVar].enmKind        = kIemNativeVarKind_Invalid;
    6327     pReNative->Core.aVars[idxVar].cbVar          = cbType;
    6328     pReNative->Core.aVars[idxVar].idxStackSlot   = UINT8_MAX;
    6329     pReNative->Core.aVars[idxVar].idxReg         = UINT8_MAX;
    6330     pReNative->Core.aVars[idxVar].uArgNo         = UINT8_MAX;
    6331     pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
    6332     pReNative->Core.aVars[idxVar].enmGstReg      = kIemNativeGstReg_End;
    6333     pReNative->Core.aVars[idxVar].fRegAcquired   = false;
    6334     pReNative->Core.aVars[idxVar].u.uValue       = 0;
    6335     return idxVar;
    6336 }
    6337 
    6338 
    6339 /**
    6340  * Internal work that allocates an argument variable w/o setting enmKind.
    6341  */
    6342 static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    6343 {
    6344     iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
    6345     AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    6346     AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
    6347 
    6348     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    6349     pReNative->Core.aidxArgVars[iArgNo]  = idxVar;
    6350     pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
    6351     return idxVar;
    6352 }
    6353 
    6354 
    6355 /**
    6356  * Gets the stack slot for a stack variable, allocating one if necessary.
    6357  *
    6358  * Calling this function implies that the stack slot will contain a valid
    6359  * variable value.  The caller deals with any register currently assigned to the
    6360  * variable, typically by spilling it into the stack slot.
    6361  *
    6362  * @returns The stack slot number.
    6363  * @param   pReNative   The recompiler state.
    6364  * @param   idxVar      The variable.
    6365  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS
    6366  */
    6367 DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    6368 {
    6369     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6370     Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
    6371 
    6372     /* Already got a slot? */
    6373     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    6374     if (idxStackSlot != UINT8_MAX)
    6375     {
    6376         Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
    6377         return idxStackSlot;
    6378     }
    6379 
    6380     /*
    6381      * A single slot is easy to allocate.
    6382      * Allocate them from the top end, closest to BP, to reduce the displacement.
    6383      */
    6384     if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
    6385     {
    6386         unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    6387         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    6388         pReNative->Core.bmStack |= RT_BIT_32(iSlot);
    6389         pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
    6390         Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
    6391         return (uint8_t)iSlot;
    6392     }
    6393 
    6394     /*
    6395      * We need more than one stack slot.
    6396      *
    6397      * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
    6398      */
    6399     AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
    6400     Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
    6401     uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
    6402     uint32_t       fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
    6403     uint32_t       bmStack       = ~pReNative->Core.bmStack;
    6404     while (bmStack != UINT32_MAX)
    6405     {
    6406 /** @todo allocate from the top to reduce BP displacement. */
    6407         unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
    6408         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    6409         if (!(iSlot & fBitAlignMask))
    6410         {
    6411             if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
    6412             {
    6413                 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
    6414                 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
    6415                 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
    6416                        idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
    6417                 return (uint8_t)iSlot;
    6418             }
    6419         }
    6420         bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
    6421     }
    6422     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    6423 }
    6424 
    6425 
    6426 /**
    6427  * Changes the variable to a stack variable.
    6428  *
    6429  * Currently this is s only possible to do the first time the variable is used,
    6430  * switching later is can be implemented but not done.
    6431  *
    6432  * @param   pReNative   The recompiler state.
    6433  * @param   idxVar      The variable.
    6434  * @throws  VERR_IEM_VAR_IPE_2
    6435  */
    6436 static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    6437 {
    6438     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6439     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    6440     {
    6441         /* We could in theory transition from immediate to stack as well, but it
    6442            would involve the caller doing work storing the value on the stack. So,
    6443            till that's required we only allow transition from invalid. */
    6444         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    6445                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6446         AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6447         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
    6448 
    6449         /* Note! We don't allocate a stack slot here, that's only done when a
    6450                  slot is actually needed to hold a variable value. */
    6451     }
    6452 }
    6453 
    6454 
    6455 /**
    6456  * Sets it to a variable with a constant value.
    6457  *
    6458  * This does not require stack storage as we know the value and can always
    6459  * reload it, unless of course it's referenced.
    6460  *
    6461  * @param   pReNative   The recompiler state.
    6462  * @param   idxVar      The variable.
    6463  * @param   uValue      The immediate value.
    6464  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    6465  */
    6466 static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
    6467 {
    6468     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6469     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
    6470     {
    6471         /* Only simple transitions for now. */
    6472         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    6473                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6474         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
    6475     }
    6476     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6477 
    6478     pReNative->Core.aVars[idxVar].u.uValue = uValue;
    6479     AssertMsg(   pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
    6480               || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
    6481               ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
    6482 }
    6483 
    6484 
    6485 /**
    6486  * Sets the variable to a reference (pointer) to @a idxOtherVar.
    6487  *
    6488  * This does not require stack storage as we know the value and can always
    6489  * reload it.  Loading is postponed till needed.
    6490  *
    6491  * @param   pReNative   The recompiler state.
    6492  * @param   idxVar      The variable.
    6493  * @param   idxOtherVar The variable to take the (stack) address of.
    6494  *
    6495  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    6496  */
    6497 static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
    6498 {
    6499     Assert(idxVar      < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
    6500     Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
    6501 
    6502     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
    6503     {
    6504         /* Only simple transitions for now. */
    6505         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    6506                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6507         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
    6508     }
    6509     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6510 
    6511     pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
    6512 
    6513     /* Update the other variable, ensure it's a stack variable. */
    6514     /** @todo handle variables with const values... that'll go boom now. */
    6515     pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
    6516     iemNativeVarSetKindToStack(pReNative, idxOtherVar);
    6517 }
    6518 
    6519 
    6520 /**
    6521  * Sets the variable to a reference (pointer) to a guest register reference.
    6522  *
    6523  * This does not require stack storage as we know the value and can always
    6524  * reload it.  Loading is postponed till needed.
    6525  *
    6526  * @param   pReNative       The recompiler state.
    6527  * @param   idxVar          The variable.
    6528  * @param   enmRegClass     The class guest registers to reference.
    6529  * @param   idxReg          The register within @a enmRegClass to reference.
    6530  *
    6531  * @throws  VERR_IEM_VAR_IPE_2
    6532  */
    6533 static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    6534                                            IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
    6535 {
    6536     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
    6537 
    6538     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
    6539     {
    6540         /* Only simple transitions for now. */
    6541         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    6542                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6543         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
    6544     }
    6545     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6546 
    6547     pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
    6548     pReNative->Core.aVars[idxVar].u.GstRegRef.idx      = idxReg;
    6549 }
    6550 
    6551 
    6552 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    6553 {
    6554     return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
    6555 }
    6556 
    6557 
    6558 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
    6559 {
    6560     uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
    6561 
    6562     /* Since we're using a generic uint64_t value type, we must truncate it if
    6563        the variable is smaller otherwise we may end up with too large value when
    6564        scaling up a imm8 w/ sign-extension.
    6565 
    6566        This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
    6567        in the bios, bx=1) when running on arm, because clang expect 16-bit
    6568        register parameters to have bits 16 and up set to zero.  Instead of
    6569        setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
    6570        CF value in the result.  */
    6571     switch (cbType)
    6572     {
    6573         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    6574         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    6575         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    6576     }
    6577     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    6578     return idxVar;
    6579 }
    6580 
    6581 
    6582 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
    6583 {
    6584     AssertStmt(   idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
    6585                && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
    6586                && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
    6587                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    6588 
    6589     uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
    6590     iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
    6591     return idxArgVar;
    6592 }
    6593 
    6594 
    6595 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    6596 {
    6597     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    6598     /* Don't set to stack now, leave that to the first use as for instance
    6599        IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
    6600     return idxVar;
    6601 }
    6602 
    6603 
    6604 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
    6605 {
    6606     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    6607 
    6608     /* Since we're using a generic uint64_t value type, we must truncate it if
    6609        the variable is smaller otherwise we may end up with too large value when
    6610        scaling up a imm8 w/ sign-extension. */
    6611     switch (cbType)
    6612     {
    6613         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    6614         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    6615         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    6616     }
    6617     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    6618     return idxVar;
    6619 }
    6620 
    6621 
    6622 /**
    6623  * Releases the variable's register.
    6624  *
    6625  * The register must have been previously acquired calling
    6626  * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
    6627  * iemNativeVarRegisterSetAndAcquire().
    6628  */
    6629 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    6630 {
    6631     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6632     Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
    6633     pReNative->Core.aVars[idxVar].fRegAcquired = false;
    6634 }
    6635 
    6636 
    6637 /**
    6638  * Makes sure variable @a idxVar has a register assigned to it and that it stays
    6639  * fixed till we call iemNativeVarRegisterRelease.
    6640  *
    6641  * @returns The host register number.
    6642  * @param   pReNative   The recompiler state.
    6643  * @param   idxVar      The variable.
    6644  * @param   poff        Pointer to the instruction buffer offset.
    6645  *                      In case a register needs to be freed up or the value
    6646  *                      loaded off the stack.
    6647  * @param  fInitialized Set if the variable must already have been initialized.
    6648  *                      Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
    6649  *                      the case.
    6650  * @param  idxRegPref   Preferred register number or UINT8_MAX.
    6651  */
    6652 DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
    6653                                                        bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
    6654 {
    6655     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6656     Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
    6657     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    6658 
    6659     uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
    6660     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    6661     {
    6662         Assert(   pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
    6663                && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
    6664         pReNative->Core.aVars[idxVar].fRegAcquired = true;
    6665         return idxReg;
    6666     }
    6667 
    6668     /*
    6669      * If the kind of variable has not yet been set, default to 'stack'.
    6670      */
    6671     Assert(   pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
    6672            && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
    6673     if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
    6674         iemNativeVarSetKindToStack(pReNative, idxVar);
    6675 
    6676     /*
    6677      * We have to allocate a register for the variable, even if its a stack one
    6678      * as we don't know if there are modification being made to it before its
    6679      * finalized (todo: analyze and insert hints about that?).
    6680      *
    6681      * If we can, we try get the correct register for argument variables. This
    6682      * is assuming that most argument variables are fetched as close as possible
    6683      * to the actual call, so that there aren't any interfering hidden calls
    6684      * (memory accesses, etc) inbetween.
    6685      *
    6686      * If we cannot or it's a variable, we make sure no argument registers
    6687      * that will be used by this MC block will be allocated here, and we always
    6688      * prefer non-volatile registers to avoid needing to spill stuff for internal
    6689      * call.
    6690      */
    6691     /** @todo Detect too early argument value fetches and warn about hidden
    6692      * calls causing less optimal code to be generated in the python script. */
    6693 
    6694     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    6695     if (   uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
    6696         && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
    6697     {
    6698         idxReg = g_aidxIemNativeCallRegs[uArgNo];
    6699         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    6700         Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
    6701     }
    6702     else if (   idxRegPref < RT_ELEMENTS(pReNative->Core.aHstRegs)
    6703              || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
    6704     {
    6705         uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
    6706         uint32_t const fRegs        = ~pReNative->Core.bmHstRegs
    6707                                     & ~pReNative->Core.bmHstRegsWithGstShadow
    6708                                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    6709                                     & fNotArgsMask;
    6710         if (fRegs)
    6711         {
    6712             /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
    6713             idxReg = (uint8_t)ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    6714                                                ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    6715             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    6716             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    6717             Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    6718         }
    6719         else
    6720         {
    6721             idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
    6722                                                IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
    6723             AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
    6724             Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    6725         }
    6726     }
    6727     else
    6728     {
    6729         idxReg = idxRegPref;
    6730         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    6731         Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
    6732     }
    6733     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    6734     pReNative->Core.aVars[idxVar].idxReg = idxReg;
    6735 
    6736     /*
    6737      * Load it off the stack if we've got a stack slot.
    6738      */
    6739     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    6740     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    6741     {
    6742         int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
    6743         switch (pReNative->Core.aVars[idxVar].cbVar)
    6744         {
    6745             case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
    6746             case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
    6747             case 3: AssertFailed(); RT_FALL_THRU();
    6748             case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
    6749             default: AssertFailed(); RT_FALL_THRU();
    6750             case 8: *poff = iemNativeEmitLoadGprByBp(   pReNative, *poff, idxReg, offDispBp); break;
    6751         }
    6752     }
    6753     else
    6754     {
    6755         Assert(idxStackSlot == UINT8_MAX);
    6756         AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    6757     }
    6758     pReNative->Core.aVars[idxVar].fRegAcquired = true;
    6759     return idxReg;
    6760 }
    6761 
    6762 
    6763 /**
    6764  * The value of variable @a idxVar will be written in full to the @a enmGstReg
    6765  * guest register.
    6766  *
    6767  * This function makes sure there is a register for it and sets it to be the
    6768  * current shadow copy of @a enmGstReg.
    6769  *
    6770  * @returns The host register number.
    6771  * @param   pReNative   The recompiler state.
    6772  * @param   idxVar      The variable.
    6773  * @param   enmGstReg   The guest register this variable will be written to
    6774  *                      after this call.
    6775  * @param   poff        Pointer to the instruction buffer offset.
    6776  *                      In case a register needs to be freed up or if the
    6777  *                      variable content needs to be loaded off the stack.
    6778  *
    6779  * @note    We DO NOT expect @a idxVar to be an argument variable,
    6780  *          because we can only in the commit stage of an instruction when this
    6781  *          function is used.
    6782  */
    6783 DECL_HIDDEN_THROW(uint8_t)
    6784 iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
    6785 {
    6786     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6787     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    6788     AssertMsgStmt(   pReNative->Core.aVars[idxVar].cbVar <= 8
    6789                   && (   pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
    6790                       || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
    6791                   ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
    6792                    pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
    6793                   IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    6794 
    6795     /*
    6796      * This shouldn't ever be used for arguments, unless it's in a weird else
    6797      * branch that doesn't do any calling and even then it's questionable.
    6798      *
    6799      * However, in case someone writes crazy wrong MC code and does register
    6800      * updates before making calls, just use the regular register allocator to
    6801      * ensure we get a register suitable for the intended argument number.
    6802      */
    6803     AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
    6804 
    6805     /*
    6806      * If there is already a register for the variable, we transfer/set the
    6807      * guest shadow copy assignment to it.
    6808      */
    6809     uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
    6810     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    6811     {
    6812         if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    6813         {
    6814             uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
    6815             iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
    6816             Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
    6817                    g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
    6818         }
    6819         else
    6820         {
    6821             iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
    6822             Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
    6823                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    6824         }
    6825         /** @todo figure this one out. We need some way of making sure the register isn't
    6826          * modified after this point, just in case we start writing crappy MC code. */
    6827         pReNative->Core.aVars[idxVar].enmGstReg    = enmGstReg;
    6828         pReNative->Core.aVars[idxVar].fRegAcquired = true;
    6829         return idxReg;
    6830     }
    6831     Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
    6832 
    6833     /*
    6834      * Because this is supposed to be the commit stage, we're just tag along with the
    6835      * temporary register allocator and upgrade it to a variable register.
    6836      */
    6837     idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
    6838     Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
    6839     Assert(pReNative->Core.aHstRegs[idxReg].idxVar  == UINT8_MAX);
    6840     pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
    6841     pReNative->Core.aHstRegs[idxReg].idxVar  = idxVar;
    6842     pReNative->Core.aVars[idxVar].idxReg     = idxReg;
    6843 
    6844     /*
    6845      * Now we need to load the register value.
    6846      */
    6847     if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
    6848         *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
    6849     else
    6850     {
    6851         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    6852         int32_t const offDispBp    = iemNativeStackCalcBpDisp(idxStackSlot);
    6853         switch (pReNative->Core.aVars[idxVar].cbVar)
    6854         {
    6855             case sizeof(uint64_t):
    6856                 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
    6857                 break;
    6858             case sizeof(uint32_t):
    6859                 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
    6860                 break;
    6861             case sizeof(uint16_t):
    6862                 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
    6863                 break;
    6864             case sizeof(uint8_t):
    6865                 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
    6866                 break;
    6867             default:
    6868                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    6869         }
    6870     }
    6871 
    6872     pReNative->Core.aVars[idxVar].fRegAcquired = true;
    6873     return idxReg;
    6874 }
    6875 
    6876 
    6877 /**
    6878  * Sets the host register for @a idxVarRc to @a idxReg.
    6879  *
    6880  * The register must not be allocated. Any guest register shadowing will be
    6881  * implictly dropped by this call.
    6882  *
    6883  * The variable must not have any register associated with it (causes
    6884  * VERR_IEM_VAR_IPE_10 to be raised).  Conversion to a stack variable is
    6885  * implied.
    6886  *
    6887  * @returns idxReg
    6888  * @param   pReNative   The recompiler state.
    6889  * @param   idxVar      The variable.
    6890  * @param   idxReg      The host register (typically IEMNATIVE_CALL_RET_GREG).
    6891  * @param   off         For recording in debug info.
    6892  *
    6893  * @throws  VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
    6894  */
    6895 DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
    6896 {
    6897     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6898     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    6899     Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    6900     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
    6901     AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
    6902 
    6903     iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
    6904     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    6905 
    6906     iemNativeVarSetKindToStack(pReNative, idxVar);
    6907     pReNative->Core.aVars[idxVar].idxReg = idxReg;
    6908 
    6909     return idxReg;
    6910 }
    6911 
    6912 
    6913 /**
    6914  * A convenient helper function.
    6915  */
    6916 DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    6917                                                              uint8_t idxReg, uint32_t *poff)
    6918 {
    6919     idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
    6920     pReNative->Core.aVars[idxVar].fRegAcquired = true;
    6921     return idxReg;
    6922 }
    6923 
    6924 
    6925 /**
    6926  * Worker that frees the stack slots for variable @a idxVar if any allocated.
    6927  *
    6928  * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
    6929  */
    6930 DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    6931 {
    6932     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    6933     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    6934     {
    6935         uint8_t const  cbVar      = pReNative->Core.aVars[idxVar].cbVar;
    6936         uint8_t const  cSlots     = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
    6937         uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
    6938         Assert(cSlots > 0);
    6939         Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
    6940         Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
    6941         pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
    6942         pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
    6943     }
    6944     else
    6945         Assert(idxStackSlot == UINT8_MAX);
    6946 }
    6947 
    6948 
    6949 /**
    6950  * Worker that frees a single variable.
    6951  *
    6952  * ASSUMES that @a idxVar is valid.
    6953  */
    6954 DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    6955 {
    6956     Assert(   pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid  /* Including invalid as we may have unused */
    6957            && pReNative->Core.aVars[idxVar].enmKind <  kIemNativeVarKind_End);    /* variables in conditional branches. */
    6958 
    6959     /* Free the host register first if any assigned. */
    6960     uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    6961     if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    6962     {
    6963         Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
    6964         pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    6965         pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    6966     }
    6967 
    6968     /* Free argument mapping. */
    6969     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    6970     if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
    6971         pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
    6972 
    6973     /* Free the stack slots. */
    6974     iemNativeVarFreeStackSlots(pReNative, idxVar);
    6975 
    6976     /* Free the actual variable. */
    6977     pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
    6978     pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    6979 }
    6980 
    6981 
    6982 /**
    6983  * Worker for iemNativeVarFreeAll that's called when there is anything to do.
    6984  */
    6985 DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
    6986 {
    6987     while (bmVars != 0)
    6988     {
    6989         uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    6990         bmVars &= ~RT_BIT_32(idxVar);
    6991 
    6992 #if 1 /** @todo optimize by simplifying this later... */
    6993         iemNativeVarFreeOneWorker(pReNative, idxVar);
    6994 #else
    6995         /* Only need to free the host register, the rest is done as bulk updates below. */
    6996         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    6997         if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    6998         {
    6999             Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
    7000             pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    7001             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    7002         }
    7003 #endif
    7004     }
    7005 #if 0 /** @todo optimize by simplifying this later... */
    7006     pReNative->Core.bmVars     = 0;
    7007     pReNative->Core.bmStack    = 0;
    7008     pReNative->Core.u64ArgVars = UINT64_MAX;
    7009 #endif
    7010 }
    7011 
    7012 
    7013 /**
    7014  * This is called by IEM_MC_END() to clean up all variables.
    7015  */
    7016 DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
    7017 {
    7018     uint32_t const bmVars = pReNative->Core.bmVars;
    7019     if (bmVars != 0)
    7020         iemNativeVarFreeAllSlow(pReNative, bmVars);
    7021     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    7022     Assert(pReNative->Core.bmStack    == 0);
    7023 }
    7024 
    7025 
    7026 #define IEM_MC_FREE_LOCAL(a_Name)   iemNativeVarFreeLocal(pReNative, a_Name)
    7027 
    7028 /**
    7029  * This is called by IEM_MC_FREE_LOCAL.
    7030  */
    7031 DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7032 {
    7033     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7034     Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
    7035     iemNativeVarFreeOneWorker(pReNative, idxVar);
    7036 }
    7037 
    7038 
    7039 #define IEM_MC_FREE_ARG(a_Name)     iemNativeVarFreeArg(pReNative, a_Name)
    7040 
    7041 /**
    7042  * This is called by IEM_MC_FREE_ARG.
    7043  */
    7044 DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7045 {
    7046     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7047     Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
    7048     iemNativeVarFreeOneWorker(pReNative, idxVar);
    7049 }
    7050 
    7051 
    7052 #define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
    7053 
    7054 /**
    7055  * This is called by IEM_MC_ASSIGN_TO_SMALLER.
    7056  */
    7057 DECL_INLINE_THROW(uint32_t)
    7058 iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
    7059 {
    7060     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
    7061     AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
    7062                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    7063     Assert(   pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
    7064            || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
    7065 
    7066     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
    7067     AssertStmt(   pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
    7068                || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
    7069                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    7070 
    7071     Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
    7072 
    7073     /*
    7074      * Special case for immediates.
    7075      */
    7076     if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
    7077     {
    7078         switch (pReNative->Core.aVars[idxVarDst].cbVar)
    7079         {
    7080             case sizeof(uint16_t):
    7081                 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
    7082                 break;
    7083             case sizeof(uint32_t):
    7084                 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
    7085                 break;
    7086             default: AssertFailed(); break;
    7087         }
    7088     }
    7089     else
    7090     {
    7091         /*
    7092          * The generic solution for now.
    7093          */
    7094         /** @todo optimize this by having the python script make sure the source
    7095          *        variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
    7096          *        statement.   Then we could just transfer the register assignments. */
    7097         uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
    7098         uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
    7099         switch (pReNative->Core.aVars[idxVarDst].cbVar)
    7100         {
    7101             case sizeof(uint16_t):
    7102                 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
    7103                 break;
    7104             case sizeof(uint32_t):
    7105                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
    7106                 break;
    7107             default: AssertFailed(); break;
    7108         }
    7109         iemNativeVarRegisterRelease(pReNative, idxVarSrc);
    7110         iemNativeVarRegisterRelease(pReNative, idxVarDst);
    7111     }
    7112     return off;
    7113 }
    7114 
    7115 
    7116 
    7117 /*********************************************************************************************************************************
    7118 *   Emitters for IEM_MC_CALL_CIMPL_XXX                                                                                           *
    7119 *********************************************************************************************************************************/
    7120 
    7121 /**
    7122  * Emits code to load a reference to the given guest register into @a idxGprDst.
    7123   */
    7124 DECL_INLINE_THROW(uint32_t)
    7125 iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
    7126                                IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
    7127 {
    7128     /*
    7129      * Get the offset relative to the CPUMCTX structure.
    7130      */
    7131     uint32_t offCpumCtx;
    7132     switch (enmClass)
    7133     {
    7134         case kIemNativeGstRegRef_Gpr:
    7135             Assert(idxRegInClass < 16);
    7136             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
    7137             break;
    7138 
    7139         case kIemNativeGstRegRef_GprHighByte:    /**< AH, CH, DH, BH*/
    7140             Assert(idxRegInClass < 4);
    7141             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
    7142             break;
    7143 
    7144         case kIemNativeGstRegRef_EFlags:
    7145             Assert(idxRegInClass == 0);
    7146             offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
    7147             break;
    7148 
    7149         case kIemNativeGstRegRef_MxCsr:
    7150             Assert(idxRegInClass == 0);
    7151             offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
    7152             break;
    7153 
    7154         case kIemNativeGstRegRef_FpuReg:
    7155             Assert(idxRegInClass < 8);
    7156             AssertFailed(); /** @todo what kind of indexing? */
    7157             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    7158             break;
    7159 
    7160         case kIemNativeGstRegRef_MReg:
    7161             Assert(idxRegInClass < 8);
    7162             AssertFailed(); /** @todo what kind of indexing? */
    7163             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    7164             break;
    7165 
    7166         case kIemNativeGstRegRef_XReg:
    7167             Assert(idxRegInClass < 16);
    7168             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
    7169             break;
    7170 
    7171         default:
    7172             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
    7173     }
    7174 
    7175     /*
    7176      * Load the value into the destination register.
    7177      */
    7178 #ifdef RT_ARCH_AMD64
    7179     off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
    7180 
    7181 #elif defined(RT_ARCH_ARM64)
    7182     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    7183     Assert(offCpumCtx < 4096);
    7184     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
    7185 
    7186 #else
    7187 # error "Port me!"
    7188 #endif
    7189 
    7190     return off;
    7191 }
    7192 
    7193 
    7194 /**
    7195  * Common code for CIMPL and AIMPL calls.
    7196  *
    7197  * These are calls that uses argument variables and such.  They should not be
    7198  * confused with internal calls required to implement an MC operation,
    7199  * like a TLB load and similar.
    7200  *
    7201  * Upon return all that is left to do is to load any hidden arguments and
    7202  * perform the call. All argument variables are freed.
    7203  *
    7204  * @returns New code buffer offset; throws VBox status code on error.
    7205  * @param   pReNative       The native recompile state.
    7206  * @param   off             The code buffer offset.
    7207  * @param   cArgs           The total nubmer of arguments (includes hidden
    7208  *                          count).
    7209  * @param   cHiddenArgs     The number of hidden arguments.  The hidden
    7210  *                          arguments must not have any variable declared for
    7211  *                          them, whereas all the regular arguments must
    7212  *                          (tstIEMCheckMc ensures this).
    7213  */
    7214 DECL_HIDDEN_THROW(uint32_t)
    7215 iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
    7216 {
    7217 #ifdef VBOX_STRICT
    7218     /*
    7219      * Assert sanity.
    7220      */
    7221     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    7222     Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
    7223     for (unsigned i = 0; i < cHiddenArgs; i++)
    7224         Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
    7225     for (unsigned i = cHiddenArgs; i < cArgs; i++)
    7226     {
    7227         Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
    7228         Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
    7229     }
    7230     iemNativeRegAssertSanity(pReNative);
    7231 #endif
    7232 
    7233     /*
    7234      * Before we do anything else, go over variables that are referenced and
    7235      * make sure they are not in a register.
    7236      */
    7237     uint32_t bmVars = pReNative->Core.bmVars;
    7238     if (bmVars)
    7239     {
    7240         do
    7241         {
    7242             uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    7243             bmVars &= ~RT_BIT_32(idxVar);
    7244 
    7245             if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
    7246             {
    7247                 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
    7248                 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7249                 {
    7250                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    7251                     Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
    7252                            idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
    7253                            idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    7254                     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    7255 
    7256                     pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    7257                     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    7258                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    7259                     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    7260                     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    7261                 }
    7262             }
    7263         } while (bmVars != 0);
    7264 #if 0 //def VBOX_STRICT
    7265         iemNativeRegAssertSanity(pReNative);
    7266 #endif
    7267     }
    7268 
    7269     uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
    7270 
    7271     /*
    7272      * First, go over the host registers that will be used for arguments and make
    7273      * sure they either hold the desired argument or are free.
    7274      */
    7275     if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
    7276     {
    7277         for (uint32_t i = 0; i < cRegArgs; i++)
    7278         {
    7279             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    7280             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    7281             {
    7282                 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
    7283                 {
    7284                     uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
    7285                     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    7286                     Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
    7287                     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    7288                     if (uArgNo == i)
    7289                     { /* prefect */ }
    7290                     /* The variable allocator logic should make sure this is impossible,
    7291                        except for when the return register is used as a parameter (ARM,
    7292                        but not x86). */
    7293 #if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
    7294                     else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
    7295                     {
    7296 # ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    7297 #  error "Implement this"
    7298 # endif
    7299                         Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
    7300                         uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
    7301                         AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
    7302                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    7303                         off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
    7304                     }
    7305 #endif
    7306                     else
    7307                     {
    7308                         AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    7309 
    7310                         if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    7311                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    7312                         else
    7313                         {
    7314                             /* just free it, can be reloaded if used again */
    7315                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    7316                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
    7317                             iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
    7318                         }
    7319                     }
    7320                 }
    7321                 else
    7322                     AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
    7323                                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
    7324             }
    7325         }
    7326 #if 0 //def VBOX_STRICT
    7327         iemNativeRegAssertSanity(pReNative);
    7328 #endif
    7329     }
    7330 
    7331     Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
    7332 
    7333 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    7334     /*
    7335      * If there are any stack arguments, make sure they are in their place as well.
    7336      *
    7337      * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
    7338      * the caller) be loading it later and it must be free (see first loop).
    7339      */
    7340     if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
    7341     {
    7342         for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
    7343         {
    7344             uint8_t const idxVar    = pReNative->Core.aidxArgVars[i];
    7345             int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
    7346             if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7347             {
    7348                 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
    7349                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
    7350                 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
    7351                 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    7352             }
    7353             else
    7354             {
    7355                 /* Use ARG0 as temp for stuff we need registers for. */
    7356                 switch (pReNative->Core.aVars[idxVar].enmKind)
    7357                 {
    7358                     case kIemNativeVarKind_Stack:
    7359                     {
    7360                         uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    7361                         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    7362                         off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
    7363                                                        iemNativeStackCalcBpDisp(idxStackSlot));
    7364                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    7365                         continue;
    7366                     }
    7367 
    7368                     case kIemNativeVarKind_Immediate:
    7369                         off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
    7370                         continue;
    7371 
    7372                     case kIemNativeVarKind_VarRef:
    7373                     {
    7374                         uint8_t const idxOtherVar    = pReNative->Core.aVars[idxVar].u.idxRefVar;
    7375                         Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    7376                         uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
    7377                         int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    7378                         uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    7379                         if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7380                         {
    7381                             off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    7382                             iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    7383                             Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    7384                         }
    7385                         Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    7386                                && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    7387                         off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
    7388                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    7389                         continue;
    7390                     }
    7391 
    7392                     case kIemNativeVarKind_GstRegRef:
    7393                         off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
    7394                                                              pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
    7395                                                              pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
    7396                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    7397                         continue;
    7398 
    7399                     case kIemNativeVarKind_Invalid:
    7400                     case kIemNativeVarKind_End:
    7401                         break;
    7402                 }
    7403                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    7404             }
    7405         }
    7406 # if 0 //def VBOX_STRICT
    7407         iemNativeRegAssertSanity(pReNative);
    7408 # endif
    7409     }
    7410 #else
    7411     AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
    7412 #endif
    7413 
    7414     /*
    7415      * Make sure the argument variables are loaded into their respective registers.
    7416      *
    7417      * We can optimize this by ASSUMING that any register allocations are for
    7418      * registeres that have already been loaded and are ready.  The previous step
    7419      * saw to that.
    7420      */
    7421     if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
    7422     {
    7423         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    7424         {
    7425             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    7426             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    7427                 Assert(   pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
    7428                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
    7429                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
    7430             else
    7431             {
    7432                 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
    7433                 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7434                 {
    7435                     Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
    7436                     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
    7437                     pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
    7438                                               | RT_BIT_32(idxArgReg);
    7439                     pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
    7440                 }
    7441                 else
    7442                 {
    7443                     /* Use ARG0 as temp for stuff we need registers for. */
    7444                     switch (pReNative->Core.aVars[idxVar].enmKind)
    7445                     {
    7446                         case kIemNativeVarKind_Stack:
    7447                         {
    7448                             uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    7449                             AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    7450                             off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
    7451                             continue;
    7452                         }
    7453 
    7454                         case kIemNativeVarKind_Immediate:
    7455                             off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
    7456                             continue;
    7457 
    7458                         case kIemNativeVarKind_VarRef:
    7459                         {
    7460                             uint8_t const idxOtherVar    = pReNative->Core.aVars[idxVar].u.idxRefVar;
    7461                             Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    7462                             uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
    7463                             int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    7464                             uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    7465                             if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7466                             {
    7467                                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    7468                                 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    7469                                 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    7470                             }
    7471                             Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    7472                                    && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    7473                             off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
    7474                             continue;
    7475                         }
    7476 
    7477                         case kIemNativeVarKind_GstRegRef:
    7478                             off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
    7479                                                                  pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
    7480                                                                  pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
    7481                             continue;
    7482 
    7483                         case kIemNativeVarKind_Invalid:
    7484                         case kIemNativeVarKind_End:
    7485                             break;
    7486                     }
    7487                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    7488                 }
    7489             }
    7490         }
    7491 #if 0 //def VBOX_STRICT
    7492         iemNativeRegAssertSanity(pReNative);
    7493 #endif
    7494     }
    7495 #ifdef VBOX_STRICT
    7496     else
    7497         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    7498         {
    7499             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
    7500             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
    7501         }
    7502 #endif
    7503 
    7504     /*
    7505      * Free all argument variables (simplified).
    7506      * Their lifetime always expires with the call they are for.
    7507      */
    7508     /** @todo Make the python script check that arguments aren't used after
    7509      *        IEM_MC_CALL_XXXX. */
    7510     /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
    7511      *        a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
    7512      *        an argument value.  There is also some FPU stuff. */
    7513     for (uint32_t i = cHiddenArgs; i < cArgs; i++)
    7514     {
    7515         uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
    7516         Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    7517 
    7518         /* no need to free registers: */
    7519         AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
    7520                   ?    pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
    7521                     || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
    7522                   : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
    7523                   ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
    7524                    i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
    7525 
    7526         pReNative->Core.aidxArgVars[i] = UINT8_MAX;
    7527         pReNative->Core.bmVars        &= ~RT_BIT_32(idxVar);
    7528         iemNativeVarFreeStackSlots(pReNative, idxVar);
    7529     }
    7530     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    7531 
    7532     /*
    7533      * Flush volatile registers as we make the call.
    7534      */
    7535     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
    7536 
    7537     return off;
    7538 }
    7539 
    7540 
    7541 /** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
    7542 DECL_HIDDEN_THROW(uint32_t)
    7543 iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
    7544                              uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
    7545 
    7546 {
    7547     /*
    7548      * Do all the call setup and cleanup.
    7549      */
    7550     off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
    7551 
    7552     /*
    7553      * Load the two or three hidden arguments.
    7554      */
    7555 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    7556     off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    7557     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7558     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
    7559 #else
    7560     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7561     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
    7562 #endif
    7563 
    7564     /*
    7565      * Make the call and check the return code.
    7566      *
    7567      * Shadow PC copies are always flushed here, other stuff depends on flags.
    7568      * Segment and general purpose registers are explictily flushed via the
    7569      * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
    7570      * macros.
    7571      */
    7572     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
    7573 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    7574     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    7575 #endif
    7576     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
    7577     if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls.  */
    7578         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    7579     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    7580 
    7581     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    7582 }
    7583 
    7584 
    7585 #define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
    7586     off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
    7587 
    7588 /** Emits code for IEM_MC_CALL_CIMPL_1. */
    7589 DECL_INLINE_THROW(uint32_t)
    7590 iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    7591                         uintptr_t pfnCImpl, uint8_t idxArg0)
    7592 {
    7593     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    7594     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
    7595 }
    7596 
    7597 
    7598 #define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
    7599     off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
    7600 
    7601 /** Emits code for IEM_MC_CALL_CIMPL_2. */
    7602 DECL_INLINE_THROW(uint32_t)
    7603 iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    7604                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
    7605 {
    7606     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    7607     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    7608     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
    7609 }
    7610 
    7611 
    7612 #define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
    7613     off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    7614                                   (uintptr_t)a_pfnCImpl, a0, a1, a2)
    7615 
    7616 /** Emits code for IEM_MC_CALL_CIMPL_3. */
    7617 DECL_INLINE_THROW(uint32_t)
    7618 iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    7619                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
    7620 {
    7621     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    7622     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    7623     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    7624     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
    7625 }
    7626 
    7627 
    7628 #define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
    7629     off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    7630                                   (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
    7631 
    7632 /** Emits code for IEM_MC_CALL_CIMPL_4. */
    7633 DECL_INLINE_THROW(uint32_t)
    7634 iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    7635                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
    7636 {
    7637     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    7638     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    7639     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    7640     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
    7641     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
    7642 }
    7643 
    7644 
    7645 #define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
    7646     off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    7647                                   (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
    7648 
    7649 /** Emits code for IEM_MC_CALL_CIMPL_4. */
    7650 DECL_INLINE_THROW(uint32_t)
    7651 iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    7652                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
    7653 {
    7654     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    7655     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    7656     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    7657     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
    7658     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
    7659     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
    7660 }
    7661 
    7662 
    7663 /** Recompiler debugging: Flush guest register shadow copies. */
    7664 #define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
    7665 
    7666 
    7667 
    7668 /*********************************************************************************************************************************
    7669 *   Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX                                                            *
    7670 *********************************************************************************************************************************/
    7671 
    7672 /**
    7673  * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
    7674  */
    7675 DECL_INLINE_THROW(uint32_t)
    7676 iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    7677                              uintptr_t pfnAImpl, uint8_t cArgs)
    7678 {
    7679     if (idxVarRc != UINT8_MAX)
    7680     {
    7681         IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
    7682         AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
    7683         AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
    7684     }
    7685 
    7686     /*
    7687      * Do all the call setup and cleanup.
    7688      */
    7689     off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
    7690 
    7691     /*
    7692      * Make the call and update the return code variable if we've got one.
    7693      */
    7694     off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
    7695     if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
    7696     {
    7697 pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
    7698         iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
    7699     }
    7700 
    7701     return off;
    7702 }
    7703 
    7704 
    7705 
    7706 #define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
    7707     off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
    7708 
    7709 #define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
    7710     off = iemNativeEmitCallAImpl0(pReNative, off, a_rc,                   (uintptr_t)(a_pfn))
    7711 
    7712 /** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
    7713 DECL_INLINE_THROW(uint32_t)
    7714 iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
    7715 {
    7716     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
    7717 }
    7718 
    7719 
    7720 #define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
    7721     off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
    7722 
    7723 #define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
    7724     off = iemNativeEmitCallAImpl1(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0)
    7725 
    7726 /** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
    7727 DECL_INLINE_THROW(uint32_t)
    7728 iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
    7729 {
    7730     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    7731     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
    7732 }
    7733 
    7734 
    7735 #define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
    7736     off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
    7737 
    7738 #define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
    7739     off = iemNativeEmitCallAImpl2(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1)
    7740 
    7741 /** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
    7742 DECL_INLINE_THROW(uint32_t)
    7743 iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    7744                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
    7745 {
    7746     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    7747     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    7748     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
    7749 }
    7750 
    7751 
    7752 #define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
    7753     off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
    7754 
    7755 #define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
    7756     off = iemNativeEmitCallAImpl3(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1, a2)
    7757 
    7758 /** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
    7759 DECL_INLINE_THROW(uint32_t)
    7760 iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    7761                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
    7762 {
    7763     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    7764     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    7765     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
    7766     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
    7767 }
    7768 
    7769 
    7770 #define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
    7771     off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
    7772 
    7773 #define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
    7774     off = iemNativeEmitCallAImpl4(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1, a2, a3)
    7775 
    7776 /** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
    7777 DECL_INLINE_THROW(uint32_t)
    7778 iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    7779                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
    7780 {
    7781     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    7782     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    7783     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
    7784     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
    7785     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
    7786 }
    7787 
    7788 
    7789 
    7790 /*********************************************************************************************************************************
    7791 *   Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX).                                                       *
    7792 *********************************************************************************************************************************/
    7793 
    7794 #define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
    7795     off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst,  a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
    7796 
    7797 #define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
    7798     off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
    7799 
    7800 #define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
    7801     off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
    7802 
    7803 #define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
    7804     off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
    7805 
    7806 
    7807 /** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
    7808  *  IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
    7809 DECL_INLINE_THROW(uint32_t)
    7810 iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
    7811 {
    7812     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    7813     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
    7814     Assert(iGRegEx < 20);
    7815 
    7816     /* Same discussion as in iemNativeEmitFetchGregU16 */
    7817     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    7818                                                                   kIemNativeGstRegUse_ReadOnly);
    7819 
    7820     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    7821     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    7822 
    7823     /* The value is zero-extended to the full 64-bit host register width. */
    7824     if (iGRegEx < 16)
    7825         off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    7826     else
    7827         off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
    7828 
    7829     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    7830     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    7831     return off;
    7832 }
    7833 
    7834 
    7835 #define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
    7836     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
    7837 
    7838 #define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
    7839     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
    7840 
    7841 #define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
    7842     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
    7843 
    7844 /** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
    7845 DECL_INLINE_THROW(uint32_t)
    7846 iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
    7847 {
    7848     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    7849     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
    7850     Assert(iGRegEx < 20);
    7851 
    7852     /* Same discussion as in iemNativeEmitFetchGregU16 */
    7853     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    7854                                                                   kIemNativeGstRegUse_ReadOnly);
    7855 
    7856     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    7857     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    7858 
    7859     if (iGRegEx < 16)
    7860     {
    7861         switch (cbSignExtended)
    7862         {
    7863             case sizeof(uint16_t):
    7864                 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    7865                 break;
    7866             case sizeof(uint32_t):
    7867                 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    7868                 break;
    7869             case sizeof(uint64_t):
    7870                 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    7871                 break;
    7872             default: AssertFailed(); break;
    7873         }
    7874     }
    7875     else
    7876     {
    7877         off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
    7878         switch (cbSignExtended)
    7879         {
    7880             case sizeof(uint16_t):
    7881                 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    7882                 break;
    7883             case sizeof(uint32_t):
    7884                 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    7885                 break;
    7886             case sizeof(uint64_t):
    7887                 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    7888                 break;
    7889             default: AssertFailed(); break;
    7890         }
    7891     }
    7892 
    7893     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    7894     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    7895     return off;
    7896 }
    7897 
    7898 
    7899 
    7900 #define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
    7901     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
    7902 
    7903 #define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
    7904     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
    7905 
    7906 #define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
    7907     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
    7908 
    7909 /** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
    7910 DECL_INLINE_THROW(uint32_t)
    7911 iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
    7912 {
    7913     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    7914     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
    7915     Assert(iGReg < 16);
    7916 
    7917     /*
    7918      * We can either just load the low 16-bit of the GPR into a host register
    7919      * for the variable, or we can do so via a shadow copy host register. The
    7920      * latter will avoid having to reload it if it's being stored later, but
    7921      * will waste a host register if it isn't touched again.  Since we don't
    7922      * know what going to happen, we choose the latter for now.
    7923      */
    7924     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    7925                                                                   kIemNativeGstRegUse_ReadOnly);
    7926 
    7927     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    7928     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    7929     off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    7930     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    7931 
    7932     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    7933     return off;
    7934 }
    7935 
    7936 
    7937 #define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
    7938     off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
    7939 
    7940 #define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
    7941     off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
    7942 
    7943 /** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
    7944 DECL_INLINE_THROW(uint32_t)
    7945 iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
    7946 {
    7947     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    7948     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
    7949     Assert(iGReg < 16);
    7950 
    7951     /*
    7952      * We can either just load the low 16-bit of the GPR into a host register
    7953      * for the variable, or we can do so via a shadow copy host register. The
    7954      * latter will avoid having to reload it if it's being stored later, but
    7955      * will waste a host register if it isn't touched again.  Since we don't
    7956      * know what going to happen, we choose the latter for now.
    7957      */
    7958     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    7959                                                                   kIemNativeGstRegUse_ReadOnly);
    7960 
    7961     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    7962     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    7963     if (cbSignExtended == sizeof(uint32_t))
    7964         off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    7965     else
    7966     {
    7967         Assert(cbSignExtended == sizeof(uint64_t));
    7968         off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    7969     }
    7970     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    7971 
    7972     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    7973     return off;
    7974 }
    7975 
    7976 
    7977 #define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
    7978     off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
    7979 
    7980 #define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
    7981     off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
    7982 
    7983 /** Emits code for IEM_MC_FETCH_GREG_U32. */
    7984 DECL_INLINE_THROW(uint32_t)
    7985 iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
    7986 {
    7987     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    7988     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
    7989     Assert(iGReg < 16);
    7990 
    7991     /*
    7992      * We can either just load the low 16-bit of the GPR into a host register
    7993      * for the variable, or we can do so via a shadow copy host register. The
    7994      * latter will avoid having to reload it if it's being stored later, but
    7995      * will waste a host register if it isn't touched again.  Since we don't
    7996      * know what going to happen, we choose the latter for now.
    7997      */
    7998     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    7999                                                                   kIemNativeGstRegUse_ReadOnly);
    8000 
    8001     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8002     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8003     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
    8004     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8005 
    8006     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8007     return off;
    8008 }
    8009 
    8010 
    8011 #define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
    8012     off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
    8013 
    8014 /** Emits code for IEM_MC_FETCH_GREG_U32. */
    8015 DECL_INLINE_THROW(uint32_t)
    8016 iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
    8017 {
    8018     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8019     Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
    8020     Assert(iGReg < 16);
    8021 
    8022     /*
    8023      * We can either just load the low 32-bit of the GPR into a host register
    8024      * for the variable, or we can do so via a shadow copy host register. The
    8025      * latter will avoid having to reload it if it's being stored later, but
    8026      * will waste a host register if it isn't touched again.  Since we don't
    8027      * know what going to happen, we choose the latter for now.
    8028      */
    8029     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8030                                                                   kIemNativeGstRegUse_ReadOnly);
    8031 
    8032     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8033     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8034     off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
    8035     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8036 
    8037     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8038     return off;
    8039 }
    8040 
    8041 
    8042 #define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
    8043     off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
    8044 
    8045 #define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
    8046     off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
    8047 
    8048 /** Emits code for IEM_MC_FETCH_GREG_U64 (and the
    8049  *  IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
    8050 DECL_INLINE_THROW(uint32_t)
    8051 iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
    8052 {
    8053     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8054     Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
    8055     Assert(iGReg < 16);
    8056 
    8057     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8058                                                                   kIemNativeGstRegUse_ReadOnly);
    8059 
    8060     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8061     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8062     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
    8063     /** @todo name the register a shadow one already? */
    8064     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8065 
    8066     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8067     return off;
    8068 }
    8069 
    8070 
    8071 
    8072 /*********************************************************************************************************************************
    8073 *   Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX).                                                        *
    8074 *********************************************************************************************************************************/
    8075 
    8076 #define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
    8077     off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
    8078 
    8079 /** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
    8080 DECL_INLINE_THROW(uint32_t)
    8081 iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
    8082 {
    8083     Assert(iGRegEx < 20);
    8084     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    8085                                                                  kIemNativeGstRegUse_ForUpdate);
    8086 #ifdef RT_ARCH_AMD64
    8087     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
    8088 
    8089     /* To the lowest byte of the register: mov r8, imm8 */
    8090     if (iGRegEx < 16)
    8091     {
    8092         if (idxGstTmpReg >= 8)
    8093             pbCodeBuf[off++] = X86_OP_REX_B;
    8094         else if (idxGstTmpReg >= 4)
    8095             pbCodeBuf[off++] = X86_OP_REX;
    8096         pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
    8097         pbCodeBuf[off++] = u8Value;
    8098     }
    8099     /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
    8100     else if (idxGstTmpReg < 4)
    8101     {
    8102         pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
    8103         pbCodeBuf[off++] = u8Value;
    8104     }
    8105     else
    8106     {
    8107         /* ror reg64, 8 */
    8108         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    8109         pbCodeBuf[off++] = 0xc1;
    8110         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    8111         pbCodeBuf[off++] = 8;
    8112 
    8113         /* mov reg8, imm8  */
    8114         if (idxGstTmpReg >= 8)
    8115             pbCodeBuf[off++] = X86_OP_REX_B;
    8116         else if (idxGstTmpReg >= 4)
    8117             pbCodeBuf[off++] = X86_OP_REX;
    8118         pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
    8119         pbCodeBuf[off++] = u8Value;
    8120 
    8121         /* rol reg64, 8 */
    8122         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    8123         pbCodeBuf[off++] = 0xc1;
    8124         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8125         pbCodeBuf[off++] = 8;
    8126     }
    8127 
    8128 #elif defined(RT_ARCH_ARM64)
    8129     uint8_t const    idxImmReg   = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
    8130     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    8131     if (iGRegEx < 16)
    8132         /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
    8133         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
    8134     else
    8135         /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
    8136         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
    8137     iemNativeRegFreeTmp(pReNative, idxImmReg);
    8138 
    8139 #else
    8140 # error "Port me!"
    8141 #endif
    8142 
    8143     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8144 
    8145     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
    8146 
    8147     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8148     return off;
    8149 }
    8150 
    8151 
    8152 #define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
    8153     off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
    8154 
    8155 /** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
    8156 DECL_INLINE_THROW(uint32_t)
    8157 iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
    8158 {
    8159     Assert(iGRegEx < 20);
    8160     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    8161 
    8162     /*
    8163      * If it's a constant value (unlikely) we treat this as a
    8164      * IEM_MC_STORE_GREG_U8_CONST statement.
    8165      */
    8166     if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
    8167     { /* likely */ }
    8168     else
    8169     {
    8170         AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
    8171                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    8172         return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
    8173     }
    8174 
    8175     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    8176                                                                  kIemNativeGstRegUse_ForUpdate);
    8177     uint8_t const    idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
    8178 
    8179 #ifdef RT_ARCH_AMD64
    8180     /* To the lowest byte of the register: mov reg8, reg8(r/m) */
    8181     if (iGRegEx < 16)
    8182     {
    8183         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    8184         if (idxGstTmpReg >= 8 || idxVarReg >= 8)
    8185             pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
    8186         else if (idxGstTmpReg >= 4)
    8187             pbCodeBuf[off++] = X86_OP_REX;
    8188         pbCodeBuf[off++] = 0x8a;
    8189         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
    8190     }
    8191     /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
    8192     else if (idxGstTmpReg < 4 && idxVarReg < 4)
    8193     {
    8194         /** @todo test this.   */
    8195         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
    8196         pbCodeBuf[off++] = 0x8a;
    8197         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
    8198     }
    8199     else
    8200     {
    8201         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
    8202 
    8203         /* ror reg64, 8 */
    8204         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    8205         pbCodeBuf[off++] = 0xc1;
    8206         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    8207         pbCodeBuf[off++] = 8;
    8208 
    8209         /* mov reg8, reg8(r/m)  */
    8210         if (idxGstTmpReg >= 8 || idxVarReg >= 8)
    8211             pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
    8212         else if (idxGstTmpReg >= 4)
    8213             pbCodeBuf[off++] = X86_OP_REX;
    8214         pbCodeBuf[off++] = 0x8a;
    8215         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
    8216 
    8217         /* rol reg64, 8 */
    8218         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    8219         pbCodeBuf[off++] = 0xc1;
    8220         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8221         pbCodeBuf[off++] = 8;
    8222     }
    8223 
    8224 #elif defined(RT_ARCH_ARM64)
    8225     /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
    8226             or
    8227        bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
    8228     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    8229     if (iGRegEx < 16)
    8230         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
    8231     else
    8232         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
    8233 
    8234 #else
    8235 # error "Port me!"
    8236 #endif
    8237     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8238 
    8239     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    8240 
    8241     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
    8242     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8243     return off;
    8244 }
    8245 
    8246 
    8247 
    8248 #define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
    8249     off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
    8250 
    8251 /** Emits code for IEM_MC_STORE_GREG_U16. */
    8252 DECL_INLINE_THROW(uint32_t)
    8253 iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
    8254 {
    8255     Assert(iGReg < 16);
    8256     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8257                                                                  kIemNativeGstRegUse_ForUpdate);
    8258 #ifdef RT_ARCH_AMD64
    8259     /* mov reg16, imm16 */
    8260     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    8261     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    8262     if (idxGstTmpReg >= 8)
    8263         pbCodeBuf[off++] = X86_OP_REX_B;
    8264     pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
    8265     pbCodeBuf[off++] = RT_BYTE1(uValue);
    8266     pbCodeBuf[off++] = RT_BYTE2(uValue);
    8267 
    8268 #elif defined(RT_ARCH_ARM64)
    8269     /* movk xdst, #uValue, lsl #0 */
    8270     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    8271     pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
    8272 
    8273 #else
    8274 # error "Port me!"
    8275 #endif
    8276 
    8277     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8278 
    8279     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8280     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8281     return off;
    8282 }
    8283 
    8284 
    8285 #define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
    8286     off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
    8287 
    8288 /** Emits code for IEM_MC_STORE_GREG_U16. */
    8289 DECL_INLINE_THROW(uint32_t)
    8290 iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    8291 {
    8292     Assert(iGReg < 16);
    8293     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    8294 
    8295     /*
    8296      * If it's a constant value (unlikely) we treat this as a
    8297      * IEM_MC_STORE_GREG_U16_CONST statement.
    8298      */
    8299     if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
    8300     { /* likely */ }
    8301     else
    8302     {
    8303         AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
    8304                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    8305         return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
    8306     }
    8307 
    8308     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8309                                                                  kIemNativeGstRegUse_ForUpdate);
    8310 
    8311 #ifdef RT_ARCH_AMD64
    8312     /* mov reg16, reg16 or [mem16] */
    8313     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
    8314     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    8315     if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8316     {
    8317         if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
    8318             pbCodeBuf[off++] = (idxGstTmpReg >= 8                              ? X86_OP_REX_R : 0)
    8319                              | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
    8320         pbCodeBuf[off++] = 0x8b;
    8321         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
    8322     }
    8323     else
    8324     {
    8325         uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
    8326         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8327         if (idxGstTmpReg >= 8)
    8328             pbCodeBuf[off++] = X86_OP_REX_R;
    8329         pbCodeBuf[off++] = 0x8b;
    8330         off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
    8331     }
    8332 
    8333 #elif defined(RT_ARCH_ARM64)
    8334     /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
    8335     uint8_t const    idxVarReg   = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
    8336     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    8337     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
    8338     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    8339 
    8340 #else
    8341 # error "Port me!"
    8342 #endif
    8343 
    8344     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8345 
    8346     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8347     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8348     return off;
    8349 }
    8350 
    8351 
    8352 #define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
    8353     off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
    8354 
    8355 /** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
    8356 DECL_INLINE_THROW(uint32_t)
    8357 iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
    8358 {
    8359     Assert(iGReg < 16);
    8360     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8361                                                                  kIemNativeGstRegUse_ForFullWrite);
    8362     off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
    8363     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8364     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8365     return off;
    8366 }
    8367 
    8368 
    8369 #define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
    8370     off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
    8371 
    8372 /** Emits code for IEM_MC_STORE_GREG_U32. */
    8373 DECL_INLINE_THROW(uint32_t)
    8374 iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    8375 {
    8376     Assert(iGReg < 16);
    8377     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    8378 
    8379     /*
    8380      * If it's a constant value (unlikely) we treat this as a
    8381      * IEM_MC_STORE_GREG_U32_CONST statement.
    8382      */
    8383     if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
    8384     { /* likely */ }
    8385     else
    8386     {
    8387         AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
    8388                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    8389         return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
    8390     }
    8391 
    8392     /*
    8393      * For the rest we allocate a guest register for the variable and writes
    8394      * it to the CPUMCTX structure.
    8395      */
    8396     uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
    8397     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8398 #ifdef VBOX_STRICT
    8399     off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
    8400 #endif
    8401     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    8402     return off;
    8403 }
    8404 
    8405 
    8406 #define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
    8407     off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
    8408 
    8409 /** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
    8410 DECL_INLINE_THROW(uint32_t)
    8411 iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
    8412 {
    8413     Assert(iGReg < 16);
    8414     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8415                                                                  kIemNativeGstRegUse_ForFullWrite);
    8416     off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
    8417     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8418     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8419     return off;
    8420 }
    8421 
    8422 
    8423 #define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
    8424     off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
    8425 
    8426 /** Emits code for IEM_MC_STORE_GREG_U64. */
    8427 DECL_INLINE_THROW(uint32_t)
    8428 iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    8429 {
    8430     Assert(iGReg < 16);
    8431     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    8432 
    8433     /*
    8434      * If it's a constant value (unlikely) we treat this as a
    8435      * IEM_MC_STORE_GREG_U64_CONST statement.
    8436      */
    8437     if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
    8438     { /* likely */ }
    8439     else
    8440     {
    8441         AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
    8442                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    8443         return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
    8444     }
    8445 
    8446     /*
    8447      * For the rest we allocate a guest register for the variable and writes
    8448      * it to the CPUMCTX structure.
    8449      */
    8450     uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
    8451     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8452     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    8453     return off;
    8454 }
    8455 
    8456 
    8457 #define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
    8458     off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
    8459 
    8460 /** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
    8461 DECL_INLINE_THROW(uint32_t)
    8462 iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
    8463 {
    8464     Assert(iGReg < 16);
    8465     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8466                                                                  kIemNativeGstRegUse_ForUpdate);
    8467     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
    8468     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8469     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8470     return off;
    8471 }
    8472 
    8473 
    8474 /*********************************************************************************************************************************
    8475 *   General purpose register manipulation (add, sub).                                                                            *
    8476 *********************************************************************************************************************************/
    8477 
    8478 #define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
    8479     off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
    8480 
    8481 /** Emits code for IEM_MC_ADD_GREG_U16. */
    8482 DECL_INLINE_THROW(uint32_t)
    8483 iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
    8484 {
    8485     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8486                                                                  kIemNativeGstRegUse_ForUpdate);
    8487 
    8488 #ifdef RT_ARCH_AMD64
    8489     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    8490     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    8491     if (idxGstTmpReg >= 8)
    8492         pbCodeBuf[off++] = X86_OP_REX_B;
    8493     if (uAddend == 1)
    8494     {
    8495         pbCodeBuf[off++] = 0xff; /* inc */
    8496         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8497     }
    8498     else
    8499     {
    8500         pbCodeBuf[off++] = 0x81;
    8501         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8502         pbCodeBuf[off++] = uAddend;
    8503         pbCodeBuf[off++] = 0;
    8504     }
    8505 
    8506 #else
    8507     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    8508     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    8509 
    8510     /* sub tmp, gstgrp, uAddend */
    8511     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
    8512 
    8513     /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
    8514     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
    8515 
    8516     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    8517 #endif
    8518 
    8519     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8520 
    8521     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8522 
    8523     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8524     return off;
    8525 }
    8526 
    8527 
    8528 #define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
    8529     off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
    8530 
    8531 #define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
    8532     off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
    8533 
    8534 /** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
    8535 DECL_INLINE_THROW(uint32_t)
    8536 iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
    8537 {
    8538     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8539                                                                  kIemNativeGstRegUse_ForUpdate);
    8540 
    8541 #ifdef RT_ARCH_AMD64
    8542     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    8543     if (f64Bit)
    8544         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
    8545     else if (idxGstTmpReg >= 8)
    8546         pbCodeBuf[off++] = X86_OP_REX_B;
    8547     if (uAddend == 1)
    8548     {
    8549         pbCodeBuf[off++] = 0xff; /* inc */
    8550         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8551     }
    8552     else if (uAddend < 128)
    8553     {
    8554         pbCodeBuf[off++] = 0x83; /* add */
    8555         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8556         pbCodeBuf[off++] = RT_BYTE1(uAddend);
    8557     }
    8558     else
    8559     {
    8560         pbCodeBuf[off++] = 0x81; /* add */
    8561         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8562         pbCodeBuf[off++] = RT_BYTE1(uAddend);
    8563         pbCodeBuf[off++] = 0;
    8564         pbCodeBuf[off++] = 0;
    8565         pbCodeBuf[off++] = 0;
    8566     }
    8567 
    8568 #else
    8569     /* sub tmp, gstgrp, uAddend */
    8570     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    8571     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
    8572 
    8573 #endif
    8574 
    8575     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8576 
    8577     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8578 
    8579     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8580     return off;
    8581 }
    8582 
    8583 
    8584 
    8585 #define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
    8586     off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
    8587 
    8588 /** Emits code for IEM_MC_SUB_GREG_U16. */
    8589 DECL_INLINE_THROW(uint32_t)
    8590 iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
    8591 {
    8592     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8593                                                                  kIemNativeGstRegUse_ForUpdate);
    8594 
    8595 #ifdef RT_ARCH_AMD64
    8596     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    8597     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    8598     if (idxGstTmpReg >= 8)
    8599         pbCodeBuf[off++] = X86_OP_REX_B;
    8600     if (uSubtrahend == 1)
    8601     {
    8602         pbCodeBuf[off++] = 0xff; /* dec */
    8603         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    8604     }
    8605     else
    8606     {
    8607         pbCodeBuf[off++] = 0x81;
    8608         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    8609         pbCodeBuf[off++] = uSubtrahend;
    8610         pbCodeBuf[off++] = 0;
    8611     }
    8612 
    8613 #else
    8614     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    8615     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    8616 
    8617     /* sub tmp, gstgrp, uSubtrahend */
    8618     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
    8619 
    8620     /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
    8621     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
    8622 
    8623     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    8624 #endif
    8625 
    8626     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8627 
    8628     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8629 
    8630     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8631     return off;
    8632 }
    8633 
    8634 
    8635 #define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
    8636     off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
    8637 
    8638 #define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
    8639     off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
    8640 
    8641 /** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
    8642 DECL_INLINE_THROW(uint32_t)
    8643 iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
    8644 {
    8645     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8646                                                                  kIemNativeGstRegUse_ForUpdate);
    8647 
    8648 #ifdef RT_ARCH_AMD64
    8649     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    8650     if (f64Bit)
    8651         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
    8652     else if (idxGstTmpReg >= 8)
    8653         pbCodeBuf[off++] = X86_OP_REX_B;
    8654     if (uSubtrahend == 1)
    8655     {
    8656         pbCodeBuf[off++] = 0xff; /* dec */
    8657         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    8658     }
    8659     else if (uSubtrahend < 128)
    8660     {
    8661         pbCodeBuf[off++] = 0x83; /* sub */
    8662         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    8663         pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
    8664     }
    8665     else
    8666     {
    8667         pbCodeBuf[off++] = 0x81; /* sub */
    8668         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    8669         pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
    8670         pbCodeBuf[off++] = 0;
    8671         pbCodeBuf[off++] = 0;
    8672         pbCodeBuf[off++] = 0;
    8673     }
    8674 
    8675 #else
    8676     /* sub tmp, gstgrp, uSubtrahend */
    8677     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    8678     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
    8679 
    8680 #endif
    8681 
    8682     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8683 
    8684     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8685 
    8686     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8687     return off;
    8688 }
    8689 
    8690 
    8691 
    8692 /*********************************************************************************************************************************
    8693 *   EFLAGS                                                                                                                       *
    8694 *********************************************************************************************************************************/
    8695 
    8696 #define IEM_MC_FETCH_EFLAGS(a_EFlags) \
    8697     off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
    8698 
    8699 /** Handles IEM_MC_FETCH_EFLAGS. */
    8700 DECL_INLINE_THROW(uint32_t)
    8701 iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
    8702 {
    8703     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
    8704     Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
    8705 
    8706     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
    8707     iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
    8708     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
    8709     iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
    8710     return off;
    8711 }
    8712 
    8713 
    8714 #define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
    8715     off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
    8716 
    8717 /** Handles IEM_MC_COMMIT_EFLAGS. */
    8718 DECL_INLINE_THROW(uint32_t)
    8719 iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
    8720 {
    8721     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
    8722     Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
    8723 
    8724     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
    8725 
    8726 #ifdef VBOX_STRICT
    8727     off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
    8728     off = iemNativeEmitJnzToFixed(pReNative, off, 1);
    8729     off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
    8730 
    8731     off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
    8732     off = iemNativeEmitJzToFixed(pReNative, off, 1);
    8733     off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
    8734 #endif
    8735 
    8736     iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
    8737     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
    8738     iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
    8739     return off;
    8740 }
    8741 
    8742 
    8743 
    8744 /*********************************************************************************************************************************
    8745 *   Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
    8746 *********************************************************************************************************************************/
    8747 
    8748 #define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
    8749     off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
    8750 
    8751 #define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
    8752     off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
    8753 
    8754 #define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
    8755     off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
    8756 
    8757 
    8758 /** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
    8759  *  IEM_MC_FETCH_SREG_ZX_U64. */
    8760 DECL_INLINE_THROW(uint32_t)
    8761 iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
    8762 {
    8763     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8764     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
    8765     Assert(iSReg < X86_SREG_COUNT);
    8766 
    8767     /*
    8768      * For now, we will not create a shadow copy of a selector.  The rational
    8769      * is that since we do not recompile the popping and loading of segment
    8770      * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
    8771      * pushing and moving to registers, there is only a small chance that the
    8772      * shadow copy will be accessed again before the register is reloaded.  One
    8773      * scenario would be nested called in 16-bit code, but I doubt it's worth
    8774      * the extra register pressure atm.
    8775      *
    8776      * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
    8777      * and iemNativeVarRegisterAcquire for a load scenario. We only got the
    8778      * store scencario covered at present (r160730).
    8779      */
    8780     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8781     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8782     off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
    8783     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8784     return off;
    8785 }
    8786 
    8787 
    8788 
    8789 /*********************************************************************************************************************************
    8790 *   Register references.                                                                                                         *
    8791 *********************************************************************************************************************************/
    8792 
    8793 #define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
    8794     off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
    8795 
    8796 #define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
    8797     off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
    8798 
    8799 /** Handles IEM_MC_REF_GREG_U8[_CONST]. */
    8800 DECL_INLINE_THROW(uint32_t)
    8801 iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
    8802 {
    8803     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
    8804     Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
    8805     Assert(iGRegEx < 20);
    8806 
    8807     if (iGRegEx < 16)
    8808         iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
    8809     else
    8810         iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
    8811 
    8812     /* If we've delayed writing back the register value, flush it now. */
    8813     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
    8814 
    8815     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    8816     if (!fConst)
    8817         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
    8818 
    8819     return off;
    8820 }
    8821 
    8822 #define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
    8823     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
    8824 
    8825 #define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
    8826     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
    8827 
    8828 #define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
    8829     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
    8830 
    8831 #define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
    8832     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
    8833 
    8834 #define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
    8835     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
    8836 
    8837 #define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
    8838     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
    8839 
    8840 #define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
    8841     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
    8842 
    8843 #define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
    8844     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
    8845 
    8846 #define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
    8847     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
    8848 
    8849 #define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
    8850     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
    8851 
    8852 /** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
    8853 DECL_INLINE_THROW(uint32_t)
    8854 iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
    8855 {
    8856     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
    8857     Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
    8858     Assert(iGReg < 16);
    8859 
    8860     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
    8861 
    8862     /* If we've delayed writing back the register value, flush it now. */
    8863     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
    8864 
    8865     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    8866     if (!fConst)
    8867         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
    8868 
    8869     return off;
    8870 }
    8871 
    8872 
    8873 #define IEM_MC_REF_EFLAGS(a_pEFlags) \
    8874     off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
    8875 
    8876 /** Handles IEM_MC_REF_EFLAGS. */
    8877 DECL_INLINE_THROW(uint32_t)
    8878 iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
    8879 {
    8880     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
    8881     Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
    8882 
    8883     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
    8884 
    8885     /* If we've delayed writing back the register value, flush it now. */
    8886     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
    8887 
    8888     /* If there is a shadow copy of guest EFLAGS, flush it now. */
    8889     iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
    8890 
    8891     return off;
    8892 }
    8893 
    8894 
    8895 /*********************************************************************************************************************************
    8896 *   Effective Address Calculation                                                                                                *
    8897 *********************************************************************************************************************************/
    8898 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
    8899     off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
    8900 
    8901 /** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
    8902  * @sa iemOpHlpCalcRmEffAddrThreadedAddr16  */
    8903 DECL_INLINE_THROW(uint32_t)
    8904 iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    8905                                          uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
    8906 {
    8907     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    8908 
    8909     /*
    8910      * Handle the disp16 form with no registers first.
    8911      *
    8912      * Convert to an immediate value, as that'll delay the register allocation
    8913      * and assignment till the memory access / call / whatever and we can use
    8914      * a more appropriate register (or none at all).
    8915      */
    8916     if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
    8917     {
    8918         iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
    8919         return off;
    8920     }
    8921 
    8922     /* Determin the displacment. */
    8923     uint16_t u16EffAddr;
    8924     switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    8925     {
    8926         case 0:  u16EffAddr = 0;                        break;
    8927         case 1:  u16EffAddr = (int16_t)(int8_t)u16Disp; break;
    8928         case 2:  u16EffAddr = u16Disp;                  break;
    8929         default: AssertFailedStmt(u16EffAddr = 0);
    8930     }
    8931 
    8932     /* Determine the registers involved. */
    8933     uint8_t idxGstRegBase;
    8934     uint8_t idxGstRegIndex;
    8935     switch (bRm & X86_MODRM_RM_MASK)
    8936     {
    8937         case 0:
    8938             idxGstRegBase  = X86_GREG_xBX;
    8939             idxGstRegIndex = X86_GREG_xSI;
    8940             break;
    8941         case 1:
    8942             idxGstRegBase  = X86_GREG_xBX;
    8943             idxGstRegIndex = X86_GREG_xDI;
    8944             break;
    8945         case 2:
    8946             idxGstRegBase  = X86_GREG_xBP;
    8947             idxGstRegIndex = X86_GREG_xSI;
    8948             break;
    8949         case 3:
    8950             idxGstRegBase  = X86_GREG_xBP;
    8951             idxGstRegIndex = X86_GREG_xDI;
    8952             break;
    8953         case 4:
    8954             idxGstRegBase  = X86_GREG_xSI;
    8955             idxGstRegIndex = UINT8_MAX;
    8956             break;
    8957         case 5:
    8958             idxGstRegBase  = X86_GREG_xDI;
    8959             idxGstRegIndex = UINT8_MAX;
    8960             break;
    8961         case 6:
    8962             idxGstRegBase  = X86_GREG_xBP;
    8963             idxGstRegIndex = UINT8_MAX;
    8964             break;
    8965 #ifdef _MSC_VER  /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
    8966         default:
    8967 #endif
    8968         case 7:
    8969             idxGstRegBase  = X86_GREG_xBX;
    8970             idxGstRegIndex = UINT8_MAX;
    8971             break;
    8972     }
    8973 
    8974     /*
    8975      * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
    8976      */
    8977     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    8978     uint8_t const idxRegBase  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    8979                                                                kIemNativeGstRegUse_ReadOnly);
    8980     uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
    8981                               ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    8982                                                                kIemNativeGstRegUse_ReadOnly)
    8983                               : UINT8_MAX;
    8984 #ifdef RT_ARCH_AMD64
    8985     if (idxRegIndex == UINT8_MAX)
    8986     {
    8987         if (u16EffAddr == 0)
    8988         {
    8989             /* movxz ret, base */
    8990             off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
    8991         }
    8992         else
    8993         {
    8994             /* lea ret32, [base64 + disp32] */
    8995             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    8996             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    8997             if (idxRegRet >= 8 || idxRegBase >= 8)
    8998                 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
    8999             pbCodeBuf[off++] = 0x8d;
    9000             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    9001                 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
    9002             else
    9003             {
    9004                 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
    9005                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    9006             }
    9007             pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
    9008             pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
    9009             pbCodeBuf[off++] = 0;
    9010             pbCodeBuf[off++] = 0;
    9011             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9012 
    9013             off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
    9014         }
    9015     }
    9016     else
    9017     {
    9018         /* lea ret32, [index64 + base64 (+ disp32)] */
    9019         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    9020         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9021         if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    9022             pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    9023                              | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    9024                              | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    9025         pbCodeBuf[off++] = 0x8d;
    9026         uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
    9027         pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    9028         pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
    9029         if (bMod == X86_MOD_MEM4)
    9030         {
    9031             pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
    9032             pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
    9033             pbCodeBuf[off++] = 0;
    9034             pbCodeBuf[off++] = 0;
    9035         }
    9036         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9037         off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
    9038     }
    9039 
    9040 #elif defined(RT_ARCH_ARM64)
    9041     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    9042     if (u16EffAddr == 0)
    9043     {
    9044         if (idxRegIndex == UINT8_MAX)
    9045             pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
    9046         else
    9047         {
    9048             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
    9049             pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
    9050         }
    9051     }
    9052     else
    9053     {
    9054         if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
    9055             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
    9056         else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
    9057             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
    9058                                                              (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
    9059         else
    9060         {
    9061             pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
    9062             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
    9063         }
    9064         if (idxRegIndex != UINT8_MAX)
    9065             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
    9066         pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
    9067     }
    9068 
    9069 #else
    9070 # error "port me"
    9071 #endif
    9072 
    9073     if (idxRegIndex != UINT8_MAX)
    9074         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    9075     iemNativeRegFreeTmp(pReNative, idxRegBase);
    9076     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    9077     return off;
    9078 }
    9079 
    9080 
    9081 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
    9082     off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
    9083 
    9084 /** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
    9085  * @see iemOpHlpCalcRmEffAddrThreadedAddr32  */
    9086 DECL_INLINE_THROW(uint32_t)
    9087 iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    9088                                          uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
    9089 {
    9090     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    9091 
    9092     /*
    9093      * Handle the disp32 form with no registers first.
    9094      *
    9095      * Convert to an immediate value, as that'll delay the register allocation
    9096      * and assignment till the memory access / call / whatever and we can use
    9097      * a more appropriate register (or none at all).
    9098      */
    9099     if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
    9100     {
    9101         iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
    9102         return off;
    9103     }
    9104 
    9105     /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
    9106     uint32_t u32EffAddr = 0;
    9107     switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    9108     {
    9109         case 0: break;
    9110         case 1: u32EffAddr = (int8_t)u32Disp; break;
    9111         case 2: u32EffAddr = u32Disp; break;
    9112         default: AssertFailed();
    9113     }
    9114 
    9115     /* Get the register (or SIB) value. */
    9116     uint8_t idxGstRegBase  = UINT8_MAX;
    9117     uint8_t idxGstRegIndex = UINT8_MAX;
    9118     uint8_t cShiftIndex    = 0;
    9119     switch (bRm & X86_MODRM_RM_MASK)
    9120     {
    9121         case 0: idxGstRegBase = X86_GREG_xAX; break;
    9122         case 1: idxGstRegBase = X86_GREG_xCX; break;
    9123         case 2: idxGstRegBase = X86_GREG_xDX; break;
    9124         case 3: idxGstRegBase = X86_GREG_xBX; break;
    9125         case 4: /* SIB */
    9126         {
    9127             /* index /w scaling . */
    9128             cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
    9129             switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
    9130             {
    9131                 case 0: idxGstRegIndex = X86_GREG_xAX; break;
    9132                 case 1: idxGstRegIndex = X86_GREG_xCX; break;
    9133                 case 2: idxGstRegIndex = X86_GREG_xDX; break;
    9134                 case 3: idxGstRegIndex = X86_GREG_xBX; break;
    9135                 case 4: cShiftIndex    = 0; /*no index*/ break;
    9136                 case 5: idxGstRegIndex = X86_GREG_xBP; break;
    9137                 case 6: idxGstRegIndex = X86_GREG_xSI; break;
    9138                 case 7: idxGstRegIndex = X86_GREG_xDI; break;
    9139             }
    9140 
    9141             /* base */
    9142             switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
    9143             {
    9144                 case 0: idxGstRegBase = X86_GREG_xAX; break;
    9145                 case 1: idxGstRegBase = X86_GREG_xCX; break;
    9146                 case 2: idxGstRegBase = X86_GREG_xDX; break;
    9147                 case 3: idxGstRegBase = X86_GREG_xBX; break;
    9148                 case 4:
    9149                     idxGstRegBase     = X86_GREG_xSP;
    9150                     u32EffAddr       += uSibAndRspOffset >> 8;
    9151                     break;
    9152                 case 5:
    9153                     if ((bRm & X86_MODRM_MOD_MASK) != 0)
    9154                         idxGstRegBase = X86_GREG_xBP;
    9155                     else
    9156                     {
    9157                         Assert(u32EffAddr == 0);
    9158                         u32EffAddr    = u32Disp;
    9159                     }
    9160                     break;
    9161                 case 6: idxGstRegBase = X86_GREG_xSI; break;
    9162                 case 7: idxGstRegBase = X86_GREG_xDI; break;
    9163             }
    9164             break;
    9165         }
    9166         case 5: idxGstRegBase = X86_GREG_xBP; break;
    9167         case 6: idxGstRegBase = X86_GREG_xSI; break;
    9168         case 7: idxGstRegBase = X86_GREG_xDI; break;
    9169     }
    9170 
    9171     /*
    9172      * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
    9173      * the start of the function.
    9174      */
    9175     if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
    9176     {
    9177         iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
    9178         return off;
    9179     }
    9180 
    9181     /*
    9182      * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    9183      */
    9184     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    9185     uint8_t       idxRegBase  = idxGstRegBase == UINT8_MAX ? UINT8_MAX
    9186                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    9187                                                                 kIemNativeGstRegUse_ReadOnly);
    9188     uint8_t       idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
    9189                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    9190                                                                kIemNativeGstRegUse_ReadOnly);
    9191 
    9192     /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
    9193     if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
    9194     {
    9195         idxRegBase  = idxRegIndex;
    9196         idxRegIndex = UINT8_MAX;
    9197     }
    9198 
    9199 #ifdef RT_ARCH_AMD64
    9200     if (idxRegIndex == UINT8_MAX)
    9201     {
    9202         if (u32EffAddr == 0)
    9203         {
    9204             /* mov ret, base */
    9205             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    9206         }
    9207         else
    9208         {
    9209             /* lea ret32, [base64 + disp32] */
    9210             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    9211             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9212             if (idxRegRet >= 8 || idxRegBase >= 8)
    9213                 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
    9214             pbCodeBuf[off++] = 0x8d;
    9215             uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
    9216             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    9217                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
    9218             else
    9219             {
    9220                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    9221                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    9222             }
    9223             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9224             if (bMod == X86_MOD_MEM4)
    9225             {
    9226                 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9227                 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9228                 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9229             }
    9230             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9231         }
    9232     }
    9233     else
    9234     {
    9235         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    9236         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9237         if (idxRegBase == UINT8_MAX)
    9238         {
    9239             /* lea ret32, [(index64 << cShiftIndex) + disp32] */
    9240             if (idxRegRet >= 8 || idxRegIndex >= 8)
    9241                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    9242                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    9243             pbCodeBuf[off++] = 0x8d;
    9244             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
    9245             pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
    9246             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9247             pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9248             pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9249             pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9250         }
    9251         else
    9252         {
    9253             /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
    9254             if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    9255                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    9256                                  | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    9257                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    9258             pbCodeBuf[off++] = 0x8d;
    9259             uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
    9260                                : (int8_t)u32EffAddr == (int32_t)u32EffAddr           ? X86_MOD_MEM1 : X86_MOD_MEM4;
    9261             pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    9262             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
    9263             if (bMod != X86_MOD_MEM0)
    9264             {
    9265                 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9266                 if (bMod == X86_MOD_MEM4)
    9267                 {
    9268                     pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9269                     pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9270                     pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9271                 }
    9272             }
    9273         }
    9274         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9275     }
    9276 
    9277 #elif defined(RT_ARCH_ARM64)
    9278     if (u32EffAddr == 0)
    9279     {
    9280         if (idxRegIndex == UINT8_MAX)
    9281             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    9282         else if (idxRegBase == UINT8_MAX)
    9283         {
    9284             if (cShiftIndex == 0)
    9285                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
    9286             else
    9287             {
    9288                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9289                 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
    9290             }
    9291         }
    9292         else
    9293         {
    9294             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9295             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
    9296                                                           false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
    9297         }
    9298     }
    9299     else
    9300     {
    9301         if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
    9302         {
    9303             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9304             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
    9305         }
    9306         else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
    9307         {
    9308             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9309             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
    9310                                                              (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
    9311         }
    9312         else
    9313         {
    9314             off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
    9315             if (idxRegBase != UINT8_MAX)
    9316             {
    9317                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9318                 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
    9319             }
    9320         }
    9321         if (idxRegIndex != UINT8_MAX)
    9322         {
    9323             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9324             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
    9325                                                           false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
    9326         }
    9327     }
    9328 
    9329 #else
    9330 # error "port me"
    9331 #endif
    9332 
    9333     if (idxRegIndex != UINT8_MAX)
    9334         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    9335     if (idxRegBase != UINT8_MAX)
    9336         iemNativeRegFreeTmp(pReNative, idxRegBase);
    9337     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    9338     return off;
    9339 }
    9340 
    9341 
    9342 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    9343     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    9344                                                    a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
    9345 
    9346 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    9347     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    9348                                                    a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
    9349 
    9350 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    9351     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    9352                                                    a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
    9353 
    9354 /**
    9355  * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
    9356  *
    9357  * @returns New off.
    9358  * @param   pReNative           .
    9359  * @param   off                 .
    9360  * @param   bRmEx               The ModRM byte but with bit 3 set to REX.B and
    9361  *                              bit 4 to REX.X.  The two bits are part of the
    9362  *                              REG sub-field, which isn't needed in this
    9363  *                              function.
    9364  * @param   uSibAndRspOffset    Two parts:
    9365  *                                - The first 8 bits make up the SIB byte.
    9366  *                                - The next 8 bits are the fixed RSP/ESP offset
    9367  *                                  in case of a pop [xSP].
    9368  * @param   u32Disp             The displacement byte/word/dword, if any.
    9369  * @param   cbInstr             The size of the fully decoded instruction. Used
    9370  *                              for RIP relative addressing.
    9371  * @param   idxVarRet           The result variable number.
    9372  * @param   f64Bit              Whether to use a 64-bit or 32-bit address size
    9373  *                              when calculating the address.
    9374  *
    9375  * @see iemOpHlpCalcRmEffAddrThreadedAddr64
    9376  */
    9377 DECL_INLINE_THROW(uint32_t)
    9378 iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
    9379                                          uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
    9380 {
    9381     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    9382 
    9383     /*
    9384      * Special case the rip + disp32 form first.
    9385      */
    9386     if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
    9387     {
    9388         uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    9389         uint8_t const idxRegPc  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
    9390                                                                   kIemNativeGstRegUse_ReadOnly);
    9391 #ifdef RT_ARCH_AMD64
    9392         if (f64Bit)
    9393         {
    9394             int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
    9395             if ((int32_t)offFinalDisp == offFinalDisp)
    9396                 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
    9397             else
    9398             {
    9399                 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
    9400                 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
    9401             }
    9402         }
    9403         else
    9404             off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
    9405 
    9406 #elif defined(RT_ARCH_ARM64)
    9407         if (f64Bit)
    9408             off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
    9409                                                                  (int64_t)(int32_t)u32Disp + cbInstr);
    9410         else
    9411             off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
    9412                                                                    (int32_t)u32Disp + cbInstr);
    9413 
    9414 #else
    9415 # error "Port me!"
    9416 #endif
    9417         iemNativeRegFreeTmp(pReNative, idxRegPc);
    9418         iemNativeVarRegisterRelease(pReNative, idxVarRet);
    9419         return off;
    9420     }
    9421 
    9422     /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
    9423     int64_t i64EffAddr = 0;
    9424     switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    9425     {
    9426         case 0: break;
    9427         case 1: i64EffAddr = (int8_t)u32Disp; break;
    9428         case 2: i64EffAddr = (int32_t)u32Disp; break;
    9429         default: AssertFailed();
    9430     }
    9431 
    9432     /* Get the register (or SIB) value. */
    9433     uint8_t idxGstRegBase  = UINT8_MAX;
    9434     uint8_t idxGstRegIndex = UINT8_MAX;
    9435     uint8_t cShiftIndex    = 0;
    9436     if ((bRmEx & X86_MODRM_RM_MASK) != 4)
    9437         idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
    9438     else /* SIB: */
    9439     {
    9440         /* index /w scaling . */
    9441         cShiftIndex    = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
    9442         idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
    9443                        | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
    9444         if (idxGstRegIndex == 4)
    9445         {
    9446             /* no index */
    9447             cShiftIndex    = 0;
    9448             idxGstRegIndex = UINT8_MAX;
    9449         }
    9450 
    9451         /* base */
    9452         idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
    9453         if (idxGstRegBase == 4)
    9454         {
    9455             /* pop [rsp] hack */
    9456             i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
    9457         }
    9458         else if (   (idxGstRegBase & X86_SIB_BASE_MASK) == 5
    9459                  && (bRmEx & X86_MODRM_MOD_MASK) == 0)
    9460         {
    9461             /* mod=0 and base=5 -> disp32, no base reg. */
    9462             Assert(i64EffAddr == 0);
    9463             i64EffAddr    = (int32_t)u32Disp;
    9464             idxGstRegBase = UINT8_MAX;
    9465         }
    9466     }
    9467 
    9468     /*
    9469      * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
    9470      * the start of the function.
    9471      */
    9472     if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
    9473     {
    9474         if (f64Bit)
    9475             iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
    9476         else
    9477             iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
    9478         return off;
    9479     }
    9480 
    9481     /*
    9482      * Now emit code that calculates:
    9483      *      idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    9484      * or if !f64Bit:
    9485      *      idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    9486      */
    9487     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    9488     uint8_t       idxRegBase  = idxGstRegBase == UINT8_MAX ? UINT8_MAX
    9489                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    9490                                                                 kIemNativeGstRegUse_ReadOnly);
    9491     uint8_t       idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
    9492                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    9493                                                                kIemNativeGstRegUse_ReadOnly);
    9494 
    9495     /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
    9496     if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
    9497     {
    9498         idxRegBase  = idxRegIndex;
    9499         idxRegIndex = UINT8_MAX;
    9500     }
    9501 
    9502 #ifdef RT_ARCH_AMD64
    9503     uint8_t bFinalAdj;
    9504     if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
    9505         bFinalAdj = 0; /* likely */
    9506     else
    9507     {
    9508         /* pop [rsp] with a problematic disp32 value.  Split out the
    9509            RSP offset and add it separately afterwards (bFinalAdj). */
    9510         /** @todo testcase: pop [rsp] with problematic disp32 (mod4).   */
    9511         Assert(idxGstRegBase == X86_GREG_xSP);
    9512         Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
    9513         bFinalAdj   = (uint8_t)(uSibAndRspOffset >> 8);
    9514         Assert(bFinalAdj != 0);
    9515         i64EffAddr -= bFinalAdj;
    9516         Assert((int32_t)i64EffAddr == i64EffAddr);
    9517     }
    9518     uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
    9519 //pReNative->pInstrBuf[off++] = 0xcc;
    9520 
    9521     if (idxRegIndex == UINT8_MAX)
    9522     {
    9523         if (u32EffAddr == 0)
    9524         {
    9525             /* mov ret, base */
    9526             if (f64Bit)
    9527                 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
    9528             else
    9529                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    9530         }
    9531         else
    9532         {
    9533             /* lea ret, [base + disp32] */
    9534             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    9535             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9536             if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
    9537                 pbCodeBuf[off++] = (idxRegRet  >= 8 ? X86_OP_REX_R : 0)
    9538                                  | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
    9539                                  | (f64Bit          ? X86_OP_REX_W : 0);
    9540             pbCodeBuf[off++] = 0x8d;
    9541             uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
    9542             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    9543                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
    9544             else
    9545             {
    9546                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    9547                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    9548             }
    9549             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9550             if (bMod == X86_MOD_MEM4)
    9551             {
    9552                 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9553                 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9554                 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9555             }
    9556             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9557         }
    9558     }
    9559     else
    9560     {
    9561         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    9562         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9563         if (idxRegBase == UINT8_MAX)
    9564         {
    9565             /* lea ret, [(index64 << cShiftIndex) + disp32] */
    9566             if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
    9567                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    9568                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
    9569                                  | (f64Bit           ? X86_OP_REX_W : 0);
    9570             pbCodeBuf[off++] = 0x8d;
    9571             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
    9572             pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
    9573             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9574             pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9575             pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9576             pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9577         }
    9578         else
    9579         {
    9580             /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
    9581             if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    9582                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    9583                                  | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    9584                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
    9585                                  | (f64Bit           ? X86_OP_REX_W : 0);
    9586             pbCodeBuf[off++] = 0x8d;
    9587             uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
    9588                                : (int8_t)u32EffAddr == (int32_t)u32EffAddr           ? X86_MOD_MEM1 : X86_MOD_MEM4;
    9589             pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    9590             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
    9591             if (bMod != X86_MOD_MEM0)
    9592             {
    9593                 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9594                 if (bMod == X86_MOD_MEM4)
    9595                 {
    9596                     pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9597                     pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9598                     pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9599                 }
    9600             }
    9601         }
    9602         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9603     }
    9604 
    9605     if (!bFinalAdj)
    9606     { /* likely */ }
    9607     else
    9608     {
    9609         Assert(f64Bit);
    9610         off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
    9611     }
    9612 
    9613 #elif defined(RT_ARCH_ARM64)
    9614     if (i64EffAddr == 0)
    9615     {
    9616         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9617         if (idxRegIndex == UINT8_MAX)
    9618             pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
    9619         else if (idxRegBase != UINT8_MAX)
    9620             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
    9621                                                           f64Bit, false /*fSetFlags*/, cShiftIndex);
    9622         else
    9623         {
    9624             Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
    9625             pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
    9626         }
    9627     }
    9628     else
    9629     {
    9630         if (f64Bit)
    9631         { /* likely */ }
    9632         else
    9633             i64EffAddr = (int32_t)i64EffAddr;
    9634 
    9635         if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
    9636         {
    9637             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9638             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
    9639         }
    9640         else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
    9641         {
    9642             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9643             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
    9644         }
    9645         else
    9646         {
    9647             if (f64Bit)
    9648                 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
    9649             else
    9650                 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
    9651             if (idxRegBase != UINT8_MAX)
    9652             {
    9653                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9654                 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
    9655             }
    9656         }
    9657         if (idxRegIndex != UINT8_MAX)
    9658         {
    9659             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9660             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
    9661                                                           f64Bit, false /*fSetFlags*/, cShiftIndex);
    9662         }
    9663     }
    9664 
    9665 #else
    9666 # error "port me"
    9667 #endif
    9668 
    9669     if (idxRegIndex != UINT8_MAX)
    9670         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    9671     if (idxRegBase != UINT8_MAX)
    9672         iemNativeRegFreeTmp(pReNative, idxRegBase);
    9673     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    9674     return off;
    9675 }
    9676 
    9677 
    9678 
    9679 
    9680 /*********************************************************************************************************************************
    9681 *   Memory fetches and stores common                                                                                             *
    9682 *********************************************************************************************************************************/
    9683 
    9684 typedef enum IEMNATIVEMITMEMOP
    9685 {
    9686     kIemNativeEmitMemOp_Store = 0,
    9687     kIemNativeEmitMemOp_Fetch,
    9688     kIemNativeEmitMemOp_Fetch_Zx_U16,
    9689     kIemNativeEmitMemOp_Fetch_Zx_U32,
    9690     kIemNativeEmitMemOp_Fetch_Zx_U64,
    9691     kIemNativeEmitMemOp_Fetch_Sx_U16,
    9692     kIemNativeEmitMemOp_Fetch_Sx_U32,
    9693     kIemNativeEmitMemOp_Fetch_Sx_U64
    9694 } IEMNATIVEMITMEMOP;
    9695 
    9696 /** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
    9697  * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
    9698  * (with iSegReg = UINT8_MAX). */
    9699 DECL_INLINE_THROW(uint32_t)
    9700 iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off,  uint8_t idxVarValue, uint8_t iSegReg,
    9701                                      uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
    9702                                      uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
    9703 {
    9704     /*
    9705      * Assert sanity.
    9706      */
    9707     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
    9708     Assert(   enmOp != kIemNativeEmitMemOp_Store
    9709            || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
    9710            || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
    9711     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
    9712     AssertStmt(   pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
    9713                || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
    9714                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    9715     Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
    9716     Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
    9717     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    9718 #ifdef VBOX_STRICT
    9719     if (iSegReg == UINT8_MAX)
    9720     {
    9721         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    9722                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    9723                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    9724         switch (cbMem)
    9725         {
    9726             case 1:
    9727                 Assert(   pfnFunction
    9728                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
    9729                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    9730                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    9731                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    9732                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    9733                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
    9734                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
    9735                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
    9736                            : UINT64_C(0xc000b000a0009000) ));
    9737                 break;
    9738             case 2:
    9739                 Assert(   pfnFunction
    9740                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
    9741                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    9742                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    9743                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    9744                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
    9745                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
    9746                            : UINT64_C(0xc000b000a0009000) ));
    9747                 break;
    9748             case 4:
    9749                 Assert(   pfnFunction
    9750                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
    9751                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
    9752                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
    9753                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
    9754                            : UINT64_C(0xc000b000a0009000) ));
    9755                 break;
    9756             case 8:
    9757                 Assert(    pfnFunction
    9758                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
    9759                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
    9760                            : UINT64_C(0xc000b000a0009000) ));
    9761                 break;
    9762         }
    9763     }
    9764     else
    9765     {
    9766         Assert(iSegReg < 6);
    9767         switch (cbMem)
    9768         {
    9769             case 1:
    9770                 Assert(   pfnFunction
    9771                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU8
    9772                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU8
    9773                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    9774                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    9775                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    9776                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
    9777                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
    9778                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
    9779                            : UINT64_C(0xc000b000a0009000) ));
    9780                 break;
    9781             case 2:
    9782                 Assert(   pfnFunction
    9783                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU16
    9784                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU16
    9785                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
    9786                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
    9787                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
    9788                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
    9789                            : UINT64_C(0xc000b000a0009000) ));
    9790                 break;
    9791             case 4:
    9792                 Assert(   pfnFunction
    9793                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU32
    9794                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU32
    9795                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
    9796                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
    9797                            : UINT64_C(0xc000b000a0009000) ));
    9798                 break;
    9799             case 8:
    9800                 Assert(    pfnFunction
    9801                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU64
    9802                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU64
    9803                            : UINT64_C(0xc000b000a0009000) ));
    9804                 break;
    9805         }
    9806     }
    9807 #endif
    9808 
    9809 #ifdef VBOX_STRICT
    9810     /*
    9811      * Check that the fExec flags we've got make sense.
    9812      */
    9813     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    9814 #endif
    9815 
    9816     /*
    9817      * To keep things simple we have to commit any pending writes first as we
    9818      * may end up making calls.
    9819      */
    9820     /** @todo we could postpone this till we make the call and reload the
    9821      * registers after returning from the call. Not sure if that's sensible or
    9822      * not, though. */
    9823     off = iemNativeRegFlushPendingWrites(pReNative, off);
    9824 
    9825     /*
    9826      * Move/spill/flush stuff out of call-volatile registers.
    9827      * This is the easy way out. We could contain this to the tlb-miss branch
    9828      * by saving and restoring active stuff here.
    9829      */
    9830     /** @todo save+restore active registers and maybe guest shadows in tlb-miss.  */
    9831     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    9832 
    9833     /*
    9834      * Define labels and allocate the result register (trying for the return
    9835      * register if we can).
    9836      */
    9837     uint16_t const uTlbSeqNo        = pReNative->uTlbSeqNo++;
    9838     uint32_t const idxLabelTlbMiss  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    9839     uint32_t const idxLabelTlbDone  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    9840     uint8_t  const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX /* special case value storing below */
    9841                                     : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
    9842                                     ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
    9843                                     : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
    9844 
    9845     /*
    9846      * First we try to go via the TLB.
    9847      */
    9848 //pReNative->pInstrBuf[off++] = 0xcc;
    9849     /** @todo later. */
    9850     RT_NOREF(fAlignMask, cbMem);
    9851 
    9852     /*
    9853      * Call helper to do the fetching.
    9854      * We flush all guest register shadow copies here.
    9855      */
    9856     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    9857 
    9858 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    9859     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    9860 #else
    9861     RT_NOREF(idxInstr);
    9862 #endif
    9863 
    9864     uint8_t idxRegArgValue;
    9865     if (iSegReg == UINT8_MAX)
    9866         idxRegArgValue = IEMNATIVE_CALL_ARG2_GREG;
    9867     else
    9868     {
    9869         /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
    9870         AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
    9871         off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
    9872 
    9873         idxRegArgValue = IEMNATIVE_CALL_ARG3_GREG;
    9874     }
    9875 
    9876     /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
    9877     if (enmOp == kIemNativeEmitMemOp_Store)
    9878     {
    9879         if (pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate)
    9880             off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArgValue, pReNative->Core.aVars[idxVarValue].u.uValue);
    9881         else
    9882         {
    9883             uint8_t const idxRegVarValue = pReNative->Core.aVars[idxVarValue].idxReg;
    9884             if (idxRegVarValue < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9885             {
    9886                 Assert(!(RT_BIT_32(idxRegVarValue) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
    9887                 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArgValue, idxRegVarValue);
    9888             }
    9889             else
    9890             {
    9891                 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarValue].idxStackSlot;
    9892                 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    9893                 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArgValue, iemNativeStackCalcBpDisp(idxStackSlot));
    9894             }
    9895         }
    9896     }
    9897 
    9898     /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
    9899     if (pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate)
    9900         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG,
    9901                                         pReNative->Core.aVars[idxVarGCPtrMem].u.uValue + offDisp);
    9902     else
    9903     {
    9904         uint8_t const idxRegVarGCPtrMem = pReNative->Core.aVars[idxVarGCPtrMem].idxReg;
    9905         if (idxRegVarGCPtrMem < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9906         {
    9907             Assert(!(RT_BIT_32(idxRegVarGCPtrMem) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
    9908             if (!offDisp)
    9909                 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem);
    9910             else
    9911                 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegVarGCPtrMem, offDisp);
    9912         }
    9913         else
    9914         {
    9915             uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarGCPtrMem].idxStackSlot;
    9916             AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    9917             AssertFailed(); /** @todo This was probably caused by iemNativeRegMoveAndFreeAndFlushAtCall above. Improve... */
    9918             off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, iemNativeStackCalcBpDisp(idxStackSlot));
    9919             if (offDisp)
    9920                 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offDisp);
    9921         }
    9922     }
    9923 
    9924     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    9925     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    9926 
    9927     /* Done setting up parameters, make the call. */
    9928     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    9929 
    9930     /*
    9931      * Put the result in the right register if this is a fetch.
    9932      */
    9933     if (enmOp != kIemNativeEmitMemOp_Store)
    9934     {
    9935         Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
    9936         if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
    9937             off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
    9938         iemNativeVarRegisterRelease(pReNative, idxVarValue);
    9939     }
    9940 
    9941     iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    9942 
    9943     return off;
    9944 }
    9945 
    9946 
    9947 
    9948 /*********************************************************************************************************************************
    9949 *   Memory fetches (IEM_MEM_FETCH_XXX).                                                                                          *
    9950 *********************************************************************************************************************************/
    9951 
    9952 /* 8-bit segmented: */
    9953 #define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
    9954     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
    9955                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
    9956                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    9957 
    9958 #define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    9959     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    9960                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
    9961                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    9962 
    9963 #define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    9964     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    9965                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    9966                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    9967 
    9968 #define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    9969     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    9970                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    9971                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    9972 
    9973 #define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    9974     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    9975                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
    9976                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
    9977 
    9978 #define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    9979     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    9980                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    9981                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
    9982 
    9983 #define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    9984     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    9985                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    9986                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
    9987 
    9988 /* 16-bit segmented: */
    9989 #define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    9990     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    9991                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    9992                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    9993 
    9994 #define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
    9995     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    9996                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    9997                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
    9998 
    9999 #define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    10000     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    10001                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    10002                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    10003 
    10004 #define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    10005     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    10006                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    10007                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    10008 
    10009 #define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    10010     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    10011                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    10012                                                (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
    10013 
    10014 #define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    10015     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    10016                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    10017                                                (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
    10018 
    10019 
    10020 /* 32-bit segmented: */
    10021 #define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    10022     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    10023                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    10024                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
    10025 
    10026 #define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
    10027     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    10028                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    10029                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
    10030 
    10031 #define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    10032     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    10033                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    10034                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
    10035 
    10036 #define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    10037     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    10038                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    10039                                                (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
    10040 
    10041 
    10042 /* 64-bit segmented: */
    10043 #define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    10044     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    10045                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
    10046                                                (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
    10047 
    10048 
    10049 
    10050 /* 8-bit flat: */
    10051 #define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
    10052     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
    10053                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
    10054                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    10055 
    10056 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
    10057     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    10058                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
    10059                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    10060 
    10061 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
    10062     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    10063                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    10064                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    10065 
    10066 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
    10067     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    10068                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    10069                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    10070 
    10071 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
    10072     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    10073                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
    10074                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
    10075 
    10076 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
    10077     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    10078                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    10079                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
    10080 
    10081 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
    10082     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    10083                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    10084                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
    10085 
    10086 
    10087 /* 16-bit flat: */
    10088 #define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
    10089     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    10090                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    10091                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    10092 
    10093 #define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
    10094     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    10095                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    10096                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
    10097 
    10098 #define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
    10099     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    10100                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    10101                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    10102 
    10103 #define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
    10104     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    10105                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    10106                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    10107 
    10108 #define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
    10109     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    10110                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    10111                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
    10112 
    10113 #define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
    10114     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    10115                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    10116                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
    10117 
    10118 /* 32-bit flat: */
    10119 #define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
    10120     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    10121                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    10122                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
    10123 
    10124 #define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
    10125     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    10126                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    10127                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
    10128 
    10129 #define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
    10130     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    10131                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    10132                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
    10133 
    10134 #define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
    10135     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    10136                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    10137                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
    10138 
    10139 /* 64-bit flat: */
    10140 #define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
    10141     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    10142                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
    10143                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
    10144 
    10145 
    10146 
    10147 /*********************************************************************************************************************************
    10148 *   Memory stores (IEM_MEM_STORE_XXX).                                                                                           *
    10149 *********************************************************************************************************************************/
    10150 
    10151 #define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
    10152     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
    10153                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
    10154                                                (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
    10155 
    10156 #define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
    10157     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
    10158                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
    10159                                                (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
    10160 
    10161 #define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
    10162     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
    10163                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
    10164                                                (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
    10165 
    10166 #define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
    10167     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
    10168                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
    10169                                                (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
    10170 
    10171 
    10172 #define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
    10173     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
    10174                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
    10175                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
    10176 
    10177 #define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
    10178     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
    10179                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
    10180                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
    10181 
    10182 #define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
    10183     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
    10184                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
    10185                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
    10186 
    10187 #define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
    10188     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
    10189                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
    10190                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
    10191 
    10192 
    10193 #define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
    10194     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    10195                                                (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
    10196 
    10197 #define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
    10198     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    10199                                                (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
    10200 
    10201 #define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
    10202     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    10203                                                (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
    10204 
    10205 #define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
    10206     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    10207                                                (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
    10208 
    10209 
    10210 #define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
    10211     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    10212                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
    10213 
    10214 #define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
    10215     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    10216                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
    10217 
    10218 #define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
    10219     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    10220                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
    10221 
    10222 #define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
    10223     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    10224                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
    10225 
    10226 /** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
    10227  *  IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
    10228 DECL_INLINE_THROW(uint32_t)
    10229 iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
    10230                                     uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
    10231 {
    10232     /*
    10233      * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
    10234      * to do the grunt work.
    10235      */
    10236     uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
    10237     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
    10238                                                cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
    10239                                                pfnFunction, idxInstr);
    10240     iemNativeVarFreeLocal(pReNative, idxVarConstValue);
    10241     return off;
    10242 }
    10243 
    10244 
    10245 
    10246 /*********************************************************************************************************************************
    10247 *   Stack Accesses.                                                                                                              *
    10248 *********************************************************************************************************************************/
    10249 /*                                                     RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
    10250 #define IEM_MC_PUSH_U16(a_u16Value) \
    10251     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16,  0, 0, 0), \
    10252                                  (uintptr_t)iemNativeHlpStackPushU16, pCallEntry->idxInstr)
    10253 #define IEM_MC_PUSH_U32(a_u32Value) \
    10254     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32,  0, 0, 0), \
    10255                                  (uintptr_t)iemNativeHlpStackPushU32, pCallEntry->idxInstr)
    10256 #define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
    10257     off = iemNativeEmitStackPush(pReNative, off, a_uSegVal,  RT_MAKE_U32_FROM_U8(32,  0, 1, 0), \
    10258                                  (uintptr_t)iemNativeHlpStackPushU32SReg, pCallEntry->idxInstr)
    10259 #define IEM_MC_PUSH_U64(a_u64Value) \
    10260     off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64,  0, 0, 0), \
    10261                                  (uintptr_t)iemNativeHlpStackPushU64, pCallEntry->idxInstr)
    10262 
    10263 #define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
    10264     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
    10265                                  (uintptr_t)iemNativeHlpStackFlat32PushU16, pCallEntry->idxInstr)
    10266 #define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
    10267     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
    10268                                  (uintptr_t)iemNativeHlpStackFlat32PushU32, pCallEntry->idxInstr)
    10269 #define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
    10270     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
    10271                                  (uintptr_t)iemNativeHlpStackFlat32PushU32SReg, pCallEntry->idxInstr)
    10272 
    10273 #define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
    10274     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
    10275                                  (uintptr_t)iemNativeHlpStackFlat64PushU16, pCallEntry->idxInstr)
    10276 #define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
    10277     off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
    10278                                  (uintptr_t)iemNativeHlpStackFlat64PushU64, pCallEntry->idxInstr)
    10279 
    10280 /** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
    10281 DECL_INLINE_THROW(uint32_t)
    10282 iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
    10283                        uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
    10284 {
    10285     /*
    10286      * Assert sanity.
    10287      */
    10288     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
    10289 #ifdef VBOX_STRICT
    10290     if (RT_BYTE2(cBitsVarAndFlat) != 0)
    10291     {
    10292         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    10293                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    10294                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    10295         Assert(   pfnFunction
    10296                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU16
    10297                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32
    10298                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32SReg
    10299                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU16
    10300                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU64
    10301                    : UINT64_C(0xc000b000a0009000) ));
    10302     }
    10303     else
    10304         Assert(   pfnFunction
    10305                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU16
    10306                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU32
    10307                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackPushU32SReg
    10308                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPushU64
    10309                    : UINT64_C(0xc000b000a0009000) ));
    10310 #endif
    10311 
    10312 #ifdef VBOX_STRICT
    10313     /*
    10314      * Check that the fExec flags we've got make sense.
    10315      */
    10316     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    10317 #endif
    10318 
    10319     /*
    10320      * To keep things simple we have to commit any pending writes first as we
    10321      * may end up making calls.
    10322      */
    10323     /** @todo we could postpone this till we make the call and reload the
    10324      * registers after returning from the call. Not sure if that's sensible or
    10325      * not, though. */
    10326     off = iemNativeRegFlushPendingWrites(pReNative, off);
    10327 
    10328     /*
    10329      * Move/spill/flush stuff out of call-volatile registers, keeping whatever
    10330      * idxVarValue might be occupying.
    10331      *
    10332      * This is the easy way out. We could contain this to the tlb-miss branch
    10333      * by saving and restoring active stuff here.
    10334      */
    10335     /** @todo save+restore active registers and maybe guest shadows in tlb-miss.  */
    10336     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarValue));
    10337 
    10338     /* For now, flush any shadow copy of the xSP register. */
    10339     iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
    10340 
    10341     /*
    10342      * Define labels and allocate the result register (trying for the return
    10343      * register if we can).
    10344      */
    10345     uint16_t const uTlbSeqNo        = pReNative->uTlbSeqNo++;
    10346     uint32_t const idxLabelTlbMiss  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    10347     uint32_t const idxLabelTlbDone  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    10348 
    10349     /*
    10350      * First we try to go via the TLB.
    10351      */
    10352 //pReNative->pInstrBuf[off++] = 0xcc;
    10353     /** @todo later. */
    10354     RT_NOREF(cBitsVarAndFlat);
    10355 
    10356     /*
    10357      * Call helper to do the popping.
    10358      */
    10359     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    10360 
    10361 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    10362     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    10363 #else
    10364     RT_NOREF(idxInstr);
    10365 #endif
    10366 
    10367     /* IEMNATIVE_CALL_ARG1_GREG = idxVarValue (first) */
    10368     off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarValue,
    10369                                                     0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
    10370 
    10371     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    10372     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    10373 
    10374     /* Done setting up parameters, make the call. */
    10375     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    10376 
    10377     /* The value variable is implictly flushed. */
    10378     iemNativeVarFreeLocal(pReNative, idxVarValue);
    10379 
    10380     iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    10381 
    10382     return off;
    10383 }
    10384 
    10385 
    10386 
    10387 /*                                                     RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
    10388 #define IEM_MC_POP_GREG_U16(a_iGReg) \
    10389     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16,  0, 0, 0), \
    10390                                     (uintptr_t)iemNativeHlpStackPopGRegU16, pCallEntry->idxInstr)
    10391 #define IEM_MC_POP_GREG_U32(a_iGReg) \
    10392     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32,  0, 0, 0), \
    10393                                     (uintptr_t)iemNativeHlpStackPopGRegU32, pCallEntry->idxInstr)
    10394 #define IEM_MC_POP_GREG_U64(a_iGReg) \
    10395     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64,  0, 0, 0), \
    10396                                     (uintptr_t)iemNativeHlpStackPopGRegU64, pCallEntry->idxInstr)
    10397 
    10398 #define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
    10399     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
    10400                                     (uintptr_t)iemNativeHlpStackFlat32PopGRegU16, pCallEntry->idxInstr)
    10401 #define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
    10402     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
    10403                                     (uintptr_t)iemNativeHlpStackFlat32PopGRegU32, pCallEntry->idxInstr)
    10404 
    10405 #define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
    10406     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
    10407                                     (uintptr_t)iemNativeHlpStackFlat64PopGRegU16, pCallEntry->idxInstr)
    10408 #define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
    10409     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
    10410                                     (uintptr_t)iemNativeHlpStackFlat64PopGRegU64, pCallEntry->idxInstr)
    10411 
    10412 /** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
    10413 DECL_INLINE_THROW(uint32_t)
    10414 iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
    10415                           uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
    10416 {
    10417     /*
    10418      * Assert sanity.
    10419      */
    10420     Assert(idxGReg < 16);
    10421 #ifdef VBOX_STRICT
    10422     if (RT_BYTE2(cBitsVarAndFlat) != 0)
    10423     {
    10424         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    10425                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    10426                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    10427         Assert(   pfnFunction
    10428                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU16
    10429                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PopGRegU32
    10430                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU16
    10431                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PopGRegU64
    10432                    : UINT64_C(0xc000b000a0009000) ));
    10433     }
    10434     else
    10435         Assert(   pfnFunction
    10436                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU16
    10437                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU32
    10438                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackPopGRegU64
    10439                    : UINT64_C(0xc000b000a0009000) ));
    10440 #endif
    10441 
    10442 #ifdef VBOX_STRICT
    10443     /*
    10444      * Check that the fExec flags we've got make sense.
    10445      */
    10446     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    10447 #endif
    10448 
    10449     /*
    10450      * To keep things simple we have to commit any pending writes first as we
    10451      * may end up making calls.
    10452      */
    10453     /** @todo we could postpone this till we make the call and reload the
    10454      * registers after returning from the call. Not sure if that's sensible or
    10455      * not, though. */
    10456     off = iemNativeRegFlushPendingWrites(pReNative, off);
    10457 
    10458     /*
    10459      * Move/spill/flush stuff out of call-volatile registers.
    10460      * This is the easy way out. We could contain this to the tlb-miss branch
    10461      * by saving and restoring active stuff here.
    10462      */
    10463     /** @todo save+restore active registers and maybe guest shadows in tlb-miss.  */
    10464     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    10465 
    10466     /* For now, flush the any shadow copy of the guest register that is about
    10467        to be popped and the xSP register. */
    10468     iemNativeRegFlushGuestShadows(pReNative,
    10469                                   RT_BIT_64(IEMNATIVEGSTREG_GPR(idxGReg)) | RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
    10470 
    10471     /*
    10472      * Define labels and allocate the result register (trying for the return
    10473      * register if we can).
    10474      */
    10475     uint16_t const uTlbSeqNo        = pReNative->uTlbSeqNo++;
    10476     uint32_t const idxLabelTlbMiss  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    10477     uint32_t const idxLabelTlbDone  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    10478 
    10479     /*
    10480      * First we try to go via the TLB.
    10481      */
    10482 //pReNative->pInstrBuf[off++] = 0xcc;
    10483     /** @todo later. */
    10484     RT_NOREF(cBitsVarAndFlat);
    10485 
    10486     /*
    10487      * Call helper to do the popping.
    10488      */
    10489     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    10490 
    10491 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    10492     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    10493 #else
    10494     RT_NOREF(idxInstr);
    10495 #endif
    10496 
    10497     /* IEMNATIVE_CALL_ARG1_GREG = iGReg */
    10498     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxGReg);
    10499 
    10500     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    10501     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    10502 
    10503     /* Done setting up parameters, make the call. */
    10504     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    10505 
    10506     iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    10507 
    10508     return off;
    10509 }
    10510 
    10511 
    10512 
    10513 /*********************************************************************************************************************************
    10514 *   Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX).                                                                      *
    10515 *********************************************************************************************************************************/
    10516 
    10517 #define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10518     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    10519                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE,  0 /*fAlignMask*/, \
    10520                                     (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
    10521 
    10522 #define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10523     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    10524                                     IEM_ACCESS_TYPE_WRITE,  0 /*fAlignMask*/, \
    10525                                     (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
    10526 
    10527 #define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10528     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    10529                                     IEM_ACCESS_TYPE_READ,  0 /*fAlignMask*/, \
    10530                                     (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
    10531 
    10532 
    10533 #define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10534     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    10535                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    10536                                     (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
    10537 
    10538 #define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10539     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    10540                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    10541                                     (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
    10542 
    10543 #define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10544     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    10545                                     IEM_ACCESS_TYPE_READ,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    10546                                     (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
    10547 
    10548 #define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10549     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
    10550                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    10551                                     (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
    10552 
    10553 
    10554 #define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10555     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    10556                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    10557                                     (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
    10558 
    10559 #define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10560     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    10561                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    10562                                     (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
    10563 
    10564 #define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10565     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    10566                                     IEM_ACCESS_TYPE_READ,  sizeof(uint32_t) - 1 /*fAlignMask*/, \
    10567                                     (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
    10568 
    10569 #define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10570     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
    10571                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    10572                                     (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
    10573 
    10574 
    10575 #define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10576     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    10577                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10578                                     (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
    10579 
    10580 #define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10581     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    10582                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10583                                     (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
    10584 
    10585 #define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10586     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    10587                                     IEM_ACCESS_TYPE_READ,  sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10588                                     (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
    10589 
    10590 #define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10591     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
    10592                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10593                                     (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
    10594 
    10595 
    10596 #define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10597     off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
    10598                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10599                                     (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
    10600 
    10601 #define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10602     off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
    10603                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
    10604                                     (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
    10605 
    10606 
    10607 #define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10608     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    10609                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    10610                                     (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
    10611 
    10612 #define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10613     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    10614                                     IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    10615                                     (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
    10616 
    10617 #define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    10618     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    10619                                     IEM_ACCESS_TYPE_READ,  sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    10620                                     (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
    10621 
    10622 
    10623 
    10624 #define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    10625     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    10626                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE,  0 /*fAlignMask*/, \
    10627                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
    10628 
    10629 #define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    10630     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    10631                                     IEM_ACCESS_TYPE_WRITE,  0 /*fAlignMask*/, \
    10632                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
    10633 
    10634 #define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    10635     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    10636                                     IEM_ACCESS_TYPE_READ,  0 /*fAlignMask*/, \
    10637                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
    10638 
    10639 
    10640 #define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    10641     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    10642                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    10643                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
    10644 
    10645 #define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    10646     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    10647                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    10648                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
    10649 
    10650 #define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    10651     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    10652                                     IEM_ACCESS_TYPE_READ,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    10653                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
    10654 
    10655 #define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
    10656     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
    10657                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    10658                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
    10659 
    10660 
    10661 #define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    10662     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    10663                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    10664                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
    10665 
    10666 #define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    10667     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    10668                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    10669                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
    10670 
    10671 #define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    10672     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    10673                                     IEM_ACCESS_TYPE_READ,  sizeof(uint32_t) - 1 /*fAlignMask*/, \
    10674                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
    10675 
    10676 #define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
    10677     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
    10678                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    10679                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
    10680 
    10681 
    10682 #define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    10683     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    10684                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10685                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
    10686 
    10687 #define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    10688     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    10689                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10690                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
    10691 
    10692 #define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    10693     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    10694                                     IEM_ACCESS_TYPE_READ,  sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10695                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
    10696 
    10697 #define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
    10698     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
    10699                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10700                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
    10701 
    10702 
    10703 #define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
    10704     off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
    10705                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    10706                                     (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
    10707 
    10708 #define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
    10709     off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
    10710                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
    10711                                     (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
    10712 
    10713 
    10714 #define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    10715     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    10716                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    10717                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
    10718 
    10719 #define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    10720     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    10721                                     IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    10722                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
    10723 
    10724 #define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    10725     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    10726                                     IEM_ACCESS_TYPE_READ,  sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    10727                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
    10728 
    10729 
    10730 DECL_INLINE_THROW(uint32_t)
    10731 iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
    10732                           uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
    10733                           uintptr_t pfnFunction, uint8_t idxInstr)
    10734 {
    10735     /*
    10736      * Assert sanity.
    10737      */
    10738     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
    10739     AssertStmt(   pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
    10740                && pReNative->Core.aVars[idxVarMem].cbVar   == sizeof(void *),
    10741                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    10742 
    10743     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
    10744     AssertStmt(   pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
    10745                && pReNative->Core.aVars[idxVarUnmapInfo].cbVar   == sizeof(uint8_t),
    10746                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    10747 
    10748     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
    10749     AssertStmt(   pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
    10750                || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
    10751                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    10752 
    10753     Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
    10754 
    10755     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    10756 
    10757 #ifdef VBOX_STRICT
    10758 # define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
    10759         (  ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
    10760          ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
    10761          : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
    10762          ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
    10763 
    10764     if (iSegReg == UINT8_MAX)
    10765     {
    10766         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    10767                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    10768                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    10769         switch (cbMem)
    10770         {
    10771             case 1:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
    10772             case 2:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
    10773             case 4:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
    10774             case 8:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
    10775             case 10:
    10776                 Assert(   pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
    10777                        || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
    10778                 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
    10779                 break;
    10780             case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
    10781 # if 0
    10782             case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
    10783             case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
    10784 # endif
    10785             default: AssertFailed(); break;
    10786         }
    10787     }
    10788     else
    10789     {
    10790         Assert(iSegReg < 6);
    10791         switch (cbMem)
    10792         {
    10793             case 1:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
    10794             case 2:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
    10795             case 4:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
    10796             case 8:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
    10797             case 10:
    10798                 Assert(   pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
    10799                        || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
    10800                 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
    10801                 break;
    10802             case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
    10803 # if 0
    10804             case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
    10805             case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
    10806 # endif
    10807             default: AssertFailed(); break;
    10808         }
    10809     }
    10810 # undef IEM_MAP_HLP_FN
    10811 #endif
    10812 
    10813 #ifdef VBOX_STRICT
    10814     /*
    10815      * Check that the fExec flags we've got make sense.
    10816      */
    10817     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    10818 #endif
    10819 
    10820     /*
    10821      * To keep things simple we have to commit any pending writes first as we
    10822      * may end up making calls.
    10823      */
    10824     /** @todo we could postpone this till we make the call and reload the
    10825      * registers after returning from the call. Not sure if that's sensible or
    10826      * not, though. */
    10827     off = iemNativeRegFlushPendingWrites(pReNative, off);
    10828 
    10829     /*
    10830      * Move/spill/flush stuff out of call-volatile registers.
    10831      * This is the easy way out. We could contain this to the tlb-miss branch
    10832      * by saving and restoring active stuff here.
    10833      */
    10834     /** @todo save+restore active registers and maybe guest shadows in tlb-miss.  */
    10835     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    10836 
    10837     /*
    10838      * Define labels and allocate the result register (trying for the return
    10839      * register if we can - which we of course can, given the above call).
    10840      */
    10841     uint16_t const uTlbSeqNo        = pReNative->uTlbSeqNo++;
    10842     uint32_t const idxLabelTlbMiss  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    10843     uint32_t const idxLabelTlbDone  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    10844     uint8_t  const idxRegMemResult  = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
    10845                                     ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
    10846                                     : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
    10847 
    10848     /*
    10849      * First we try to go via the TLB.
    10850      */
    10851 //pReNative->pInstrBuf[off++] = 0xcc;
    10852     /** @todo later. */
    10853     RT_NOREF(fAccess, fAlignMask, cbMem);
    10854 
    10855     /*
    10856      * Call helper to do the fetching.
    10857      * We flush all guest register shadow copies here.
    10858      */
    10859     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    10860 
    10861 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    10862     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    10863 #else
    10864     RT_NOREF(idxInstr);
    10865 #endif
    10866 
    10867     /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
    10868     if (iSegReg != UINT8_MAX)
    10869     {
    10870         AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
    10871         off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
    10872     }
    10873 
    10874     /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem */
    10875     off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem);
    10876 
    10877     /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo */
    10878     iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
    10879     off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo, true /*fFlushShadows*/);
    10880 
    10881     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    10882     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    10883 
    10884     /* Done setting up parameters, make the call. */
    10885     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    10886 
    10887     /*
    10888      * Put the result in the right register .
    10889      */
    10890     Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
    10891     if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
    10892         off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
    10893     iemNativeVarRegisterRelease(pReNative, idxVarMem);
    10894 
    10895     iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    10896 
    10897     return off;
    10898 }
    10899 
    10900 
    10901 #define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
    10902     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
    10903                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
    10904 
    10905 #define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
    10906     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
    10907                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
    10908 
    10909 #define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
    10910     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
    10911                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
    10912 
    10913 DECL_INLINE_THROW(uint32_t)
    10914 iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
    10915                                uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
    10916 {
    10917     /*
    10918      * Assert sanity.
    10919      */
    10920     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
    10921     Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
    10922     Assert(   pReNative->Core.aVars[idxVarUnmapInfo].idxReg       < RT_ELEMENTS(pReNative->Core.aHstRegs)
    10923            || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
    10924 #ifdef VBOX_STRICT
    10925     switch (fAccess & IEM_ACCESS_TYPE_MASK)
    10926     {
    10927         case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
    10928         case IEM_ACCESS_TYPE_WRITE:                        Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
    10929         case IEM_ACCESS_TYPE_READ:                         Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
    10930         default: AssertFailed();
    10931     }
    10932 #else
    10933     RT_NOREF(fAccess);
    10934 #endif
    10935 
    10936     /*
    10937      * To keep things simple we have to commit any pending writes first as we
    10938      * may end up making calls (there shouldn't be any at this point, so this
    10939      * is just for consistency).
    10940      */
    10941     /** @todo we could postpone this till we make the call and reload the
    10942      * registers after returning from the call. Not sure if that's sensible or
    10943      * not, though. */
    10944     off = iemNativeRegFlushPendingWrites(pReNative, off);
    10945 
    10946     /*
    10947      * Move/spill/flush stuff out of call-volatile registers.
    10948      *
    10949      * We exclude any register holding the bUnmapInfo variable, as we'll be
    10950      * checking it after returning from the call and will free it afterwards.
    10951      */
    10952     /** @todo save+restore active registers and maybe guest shadows in miss
    10953      *        scenario. */
    10954     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
    10955 
    10956     /*
    10957      * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
    10958      * to call the unmap helper function.
    10959      *
    10960      * The likelyhood of it being zero is higher than for the TLB hit when doing
    10961      * the mapping, as a TLB miss for an well aligned and unproblematic memory
    10962      * access should also end up with a mapping that won't need special unmapping.
    10963      */
    10964     /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case!  That
    10965      *        should speed up things for the pure interpreter as well when TLBs
    10966      *        are enabled. */
    10967 #ifdef RT_ARCH_AMD64
    10968     if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
    10969     {
    10970         /* test byte [rbp - xxx], 0ffh  */
    10971         uint8_t * const pbCodeBuf    = iemNativeInstrBufEnsure(pReNative, off, 7);
    10972         pbCodeBuf[off++] = 0xf6;
    10973         uint8_t const   idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
    10974         off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
    10975         pbCodeBuf[off++] = 0xff;
    10976         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10977     }
    10978     else
    10979 #endif
    10980     {
    10981         uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
    10982                                                               true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
    10983         off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
    10984         iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
    10985     }
    10986     uint32_t const offJmpFixup = off;
    10987     off = iemNativeEmitJzToFixed(pReNative, off, 0);
    10988 
    10989     /*
    10990      * Call the unmap helper function.
    10991      */
    10992 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
    10993     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    10994 #else
    10995     RT_NOREF(idxInstr);
    10996 #endif
    10997 
    10998     /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
    10999     off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
    11000                                                0 /*offAddend*/, true /*fVarAllowInVolatileReg*/);
    11001 
    11002     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    11003     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    11004 
    11005     /* Done setting up parameters, make the call. */
    11006     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    11007 
    11008     /* The bUnmapInfo variable is implictly free by these MCs. */
    11009     iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
    11010 
    11011     /*
    11012      * Done, just fixup the jump for the non-call case.
    11013      */
    11014     iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
    11015 
    11016     return off;
    11017 }
    11018 
    11019 
    11020 
    11021 /*********************************************************************************************************************************
    11022 *   State and Exceptions                                                                                                         *
    11023 *********************************************************************************************************************************/
    11024 
    11025 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    11026 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    11027 
    11028 #define IEM_MC_PREPARE_SSE_USAGE()                  off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    11029 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    11030 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    11031 
    11032 #define IEM_MC_PREPARE_AVX_USAGE()                  off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    11033 #define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    11034 #define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    11035 
    11036 
    11037 DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
    11038 {
    11039     /** @todo this needs a lot more work later. */
    11040     RT_NOREF(pReNative, fForChange);
    11041     return off;
    11042 }
    1104363
    1104464
     
    1105171 * Built-in function that calls a C-implemention function taking zero arguments.
    1105272 */
    11053 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
     73IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
    1105474{
    1105575    PFNIEMCIMPL0 const pfnCImpl     = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
     
    1106888 * a non-zero status that stops TB execution.
    1106989 */
    11070 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
     90IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
    1107191{
    1107292    RT_NOREF(pCallEntry);
     
    11153173 * Built-in function checks if IEMCPU::fExec has the expected value.
    11154174 */
    11155 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
     175IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
    11156176{
    11157177    uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
     
    11852872 * raising a \#GP(0) if this isn't the case.
    11853873 */
    11854 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
     874IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
    11855875{
    11856876    uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
     
    11867887 * that may have modified them.
    11868888 */
    11869 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
     889IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
    11870890{
    11871891    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    11886906 * modified them.
    11887907 */
    11888 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
     908IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
    11889909{
    11890910    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    11904924 * checking after an instruction that may have modified them.
    11905925 */
    11906 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
     926IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
    11907927{
    11908928    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    11929949 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
    11930950 */
    11931 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
     951IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
    11932952{
    11933953    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    11953973 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
    11954974 */
    11955 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
     975IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
    11956976{
    11957977    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    11977997 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
    11978998 */
    11979 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
     999IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
    119801000{
    119811001    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    120041024 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
    120051025 */
    12006 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
     1026IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
    120071027{
    120081028    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    120311051 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
    120321052 */
    12033 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
     1053IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
    120341054{
    120351055    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    120571077 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
    120581078 */
    12059 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
     1079IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
    120601080{
    120611081    PCIEMTB const  pTb      = pReNative->pTbOrg;
     
    120901110 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
    120911111 */
    12092 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
     1112IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
    120931113{
    120941114    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    121191139 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
    121201140 */
    12121 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
     1141IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
    121221142{
    121231143    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    121481168 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
    121491169 */
    12150 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
     1170IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
    121511171{
    121521172    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    121751195 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
    121761196 */
    12177 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
     1197IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
    121781198{
    121791199    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    122011221 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
    122021222 */
    12203 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
     1223IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
    122041224{
    122051225    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    122261246 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
    122271247 */
    12228 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
     1248IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
    122291249{
    122301250    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    122501270 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
    122511271 */
    12252 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
     1272IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
    122531273{
    122541274    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    122721292 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
    122731293 */
    12274 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
     1294IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
    122751295{
    122761296    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    122941314 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
    122951315 */
    12296 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
     1316IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
    122971317{
    122981318    PCIEMTB const  pTb         = pReNative->pTbOrg;
     
    123081328#endif
    123091329
    12310 
    12311 /*********************************************************************************************************************************
    12312 *   The native code generator functions for each MC block.                                                                       *
    12313 *********************************************************************************************************************************/
    12314 
    12315 
    12316 /*
    12317  * Include g_apfnIemNativeRecompileFunctions and associated functions.
    12318  *
    12319  * This should probably live in it's own file later, but lets see what the
    12320  * compile times turn out to be first.
    12321  */
    12322 #include "IEMNativeFunctions.cpp.h"
    12323 
    12324 
    12325 
    12326 /*********************************************************************************************************************************
    12327 *   Recompiler Core.                                                                                                             *
    12328 *********************************************************************************************************************************/
    12329 
    12330 
    12331 /** @callback_method_impl{FNDISREADBYTES, Dummy.} */
    12332 static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
    12333 {
    12334     RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
    12335     pDis->cbCachedInstr += cbMaxRead;
    12336     RT_NOREF(cbMinRead);
    12337     return VERR_NO_DATA;
    12338 }
    12339 
    12340 
    12341 /**
    12342  * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
    12343  * @returns pszBuf.
    12344  * @param   fFlags  The flags.
    12345  * @param   pszBuf  The output buffer.
    12346  * @param   cbBuf   The output buffer size.  At least 32 bytes.
    12347  */
    12348 DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
    12349 {
    12350     Assert(cbBuf >= 32);
    12351     static RTSTRTUPLE const s_aModes[] =
    12352     {
    12353         /* [00] = */ { RT_STR_TUPLE("16BIT") },
    12354         /* [01] = */ { RT_STR_TUPLE("32BIT") },
    12355         /* [02] = */ { RT_STR_TUPLE("!2!") },
    12356         /* [03] = */ { RT_STR_TUPLE("!3!") },
    12357         /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
    12358         /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
    12359         /* [06] = */ { RT_STR_TUPLE("!6!") },
    12360         /* [07] = */ { RT_STR_TUPLE("!7!") },
    12361         /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
    12362         /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
    12363         /* [0a] = */ { RT_STR_TUPLE("64BIT") },
    12364         /* [0b] = */ { RT_STR_TUPLE("!b!") },
    12365         /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
    12366         /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
    12367         /* [0e] = */ { RT_STR_TUPLE("!e!") },
    12368         /* [0f] = */ { RT_STR_TUPLE("!f!") },
    12369         /* [10] = */ { RT_STR_TUPLE("!10!") },
    12370         /* [11] = */ { RT_STR_TUPLE("!11!") },
    12371         /* [12] = */ { RT_STR_TUPLE("!12!") },
    12372         /* [13] = */ { RT_STR_TUPLE("!13!") },
    12373         /* [14] = */ { RT_STR_TUPLE("!14!") },
    12374         /* [15] = */ { RT_STR_TUPLE("!15!") },
    12375         /* [16] = */ { RT_STR_TUPLE("!16!") },
    12376         /* [17] = */ { RT_STR_TUPLE("!17!") },
    12377         /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
    12378         /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
    12379         /* [1a] = */ { RT_STR_TUPLE("!1a!") },
    12380         /* [1b] = */ { RT_STR_TUPLE("!1b!") },
    12381         /* [1c] = */ { RT_STR_TUPLE("!1c!") },
    12382         /* [1d] = */ { RT_STR_TUPLE("!1d!") },
    12383         /* [1e] = */ { RT_STR_TUPLE("!1e!") },
    12384         /* [1f] = */ { RT_STR_TUPLE("!1f!") },
    12385     };
    12386     AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
    12387     memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
    12388     size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
    12389 
    12390     pszBuf[off++] = ' ';
    12391     pszBuf[off++] = 'C';
    12392     pszBuf[off++] = 'P';
    12393     pszBuf[off++] = 'L';
    12394     pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
    12395     Assert(off < 32);
    12396 
    12397     fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
    12398 
    12399     static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
    12400     {
    12401         { RT_STR_TUPLE("BYPASS_HANDLERS"),      IEM_F_BYPASS_HANDLERS    },
    12402         { RT_STR_TUPLE("PENDING_BRK_INSTR"),    IEM_F_PENDING_BRK_INSTR  },
    12403         { RT_STR_TUPLE("PENDING_BRK_DATA"),     IEM_F_PENDING_BRK_DATA   },
    12404         { RT_STR_TUPLE("PENDING_BRK_X86_IO"),   IEM_F_PENDING_BRK_X86_IO },
    12405         { RT_STR_TUPLE("X86_DISREGARD_LOCK"),   IEM_F_X86_DISREGARD_LOCK },
    12406         { RT_STR_TUPLE("X86_CTX_VMX"),          IEM_F_X86_CTX_VMX        },
    12407         { RT_STR_TUPLE("X86_CTX_SVM"),          IEM_F_X86_CTX_SVM        },
    12408         { RT_STR_TUPLE("X86_CTX_IN_GUEST"),     IEM_F_X86_CTX_IN_GUEST   },
    12409         { RT_STR_TUPLE("X86_CTX_SMM"),          IEM_F_X86_CTX_SMM        },
    12410         { RT_STR_TUPLE("INHIBIT_SHADOW"),       IEMTB_F_INHIBIT_SHADOW   },
    12411         { RT_STR_TUPLE("INHIBIT_NMI"),          IEMTB_F_INHIBIT_NMI      },
    12412         { RT_STR_TUPLE("CS_LIM_CHECKS"),        IEMTB_F_CS_LIM_CHECKS    },
    12413         { RT_STR_TUPLE("TYPE_THREADED"),        IEMTB_F_TYPE_THREADED    },
    12414         { RT_STR_TUPLE("TYPE_NATIVE"),          IEMTB_F_TYPE_NATIVE      },
    12415     };
    12416     if (fFlags)
    12417         for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
    12418             if (s_aFlags[i].fFlag & fFlags)
    12419             {
    12420                 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
    12421                 pszBuf[off++] = ' ';
    12422                 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
    12423                 off += s_aFlags[i].cchName;
    12424                 fFlags &= ~s_aFlags[i].fFlag;
    12425                 if (!fFlags)
    12426                     break;
    12427             }
    12428     pszBuf[off] = '\0';
    12429 
    12430     return pszBuf;
    12431 }
    12432 
    12433 
    12434 DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
    12435 {
    12436     AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
    12437 
    12438     char                    szDisBuf[512];
    12439     DISSTATE                Dis;
    12440     PCIEMNATIVEINSTR const  paNative      = pTb->Native.paInstructions;
    12441     uint32_t const          cNative       = pTb->Native.cInstructions;
    12442     uint32_t                offNative     = 0;
    12443 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    12444     PCIEMTBDBG const        pDbgInfo      = pTb->pDbgInfo;
    12445 #endif
    12446     DISCPUMODE              enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    12447                                           : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    12448                                           :                                                            DISCPUMODE_64BIT;
    12449 #if   defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    12450     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_64BIT;
    12451 #elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    12452     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_ARMV8_A64;
    12453 #elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    12454 # error "Port me"
    12455 #else
    12456     csh                     hDisasm       = ~(size_t)0;
    12457 # if defined(RT_ARCH_AMD64)
    12458     cs_err                  rcCs          = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
    12459 # elif defined(RT_ARCH_ARM64)
    12460     cs_err                  rcCs          = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
    12461 # else
    12462 #  error "Port me"
    12463 # endif
    12464     AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
    12465 #endif
    12466 
    12467     /*
    12468      * Print TB info.
    12469      */
    12470     pHlp->pfnPrintf(pHlp,
    12471                     "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
    12472                     "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
    12473                     pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
    12474                     pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
    12475 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    12476     if (pDbgInfo && pDbgInfo->cEntries > 1)
    12477     {
    12478         Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
    12479 
    12480         /*
    12481          * This disassembly is driven by the debug info which follows the native
    12482          * code and indicates when it starts with the next guest instructions,
    12483          * where labels are and such things.
    12484          */
    12485         uint32_t                idxThreadedCall  = 0;
    12486         uint32_t                fExec            = pTb->fFlags & UINT32_C(0x00ffffff);
    12487         uint8_t                 idxRange         = UINT8_MAX;
    12488         uint8_t const           cRanges          = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
    12489         uint32_t                offRange         = 0;
    12490         uint32_t                offOpcodes       = 0;
    12491         uint32_t const          cbOpcodes        = pTb->cbOpcodes;
    12492         RTGCPHYS                GCPhysPc         = pTb->GCPhysPc;
    12493         uint32_t const          cDbgEntries      = pDbgInfo->cEntries;
    12494         uint32_t                iDbgEntry        = 1;
    12495         uint32_t                offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
    12496 
    12497         while (offNative < cNative)
    12498         {
    12499             /* If we're at or have passed the point where the next chunk of debug
    12500                info starts, process it. */
    12501             if (offDbgNativeNext <= offNative)
    12502             {
    12503                 offDbgNativeNext = UINT32_MAX;
    12504                 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
    12505                 {
    12506                     switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
    12507                     {
    12508                         case kIemTbDbgEntryType_GuestInstruction:
    12509                         {
    12510                             /* Did the exec flag change? */
    12511                             if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
    12512                             {
    12513                                 pHlp->pfnPrintf(pHlp,
    12514                                                 "  fExec change %#08x -> %#08x %s\n",
    12515                                                 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    12516                                                 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    12517                                                                    szDisBuf, sizeof(szDisBuf)));
    12518                                 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
    12519                                 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    12520                                               : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    12521                                               :                                                      DISCPUMODE_64BIT;
    12522                             }
    12523 
    12524                             /* New opcode range? We need to fend up a spurious debug info entry here for cases
    12525                                where the compilation was aborted before the opcode was recorded and the actual
    12526                                instruction was translated to a threaded call.  This may happen when we run out
    12527                                of ranges, or when some complicated interrupts/FFs are found to be pending or
    12528                                similar.  So, we just deal with it here rather than in the compiler code as it
    12529                                is a lot simpler to do here. */
    12530                             if (   idxRange == UINT8_MAX
    12531                                 || idxRange >= cRanges
    12532                                 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
    12533                             {
    12534                                 idxRange += 1;
    12535                                 if (idxRange < cRanges)
    12536                                     offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
    12537                                 else
    12538                                     continue;
    12539                                 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
    12540                                 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
    12541                                          + (pTb->aRanges[idxRange].idxPhysPage == 0
    12542                                             ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    12543                                             : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
    12544                                 pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    12545                                                 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
    12546                                                 pTb->aRanges[idxRange].idxPhysPage);
    12547                                 GCPhysPc += offRange;
    12548                             }
    12549 
    12550                             /* Disassemble the instruction. */
    12551                             //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
    12552                             uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offRange, 15);
    12553                             uint32_t      cbInstr    = 1;
    12554                             int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    12555                                                                  &pTb->pabOpcodes[offOpcodes], cbInstrMax,
    12556                                                                  iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    12557                             if (RT_SUCCESS(rc))
    12558                             {
    12559                                 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    12560                                                              DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    12561                                                              | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    12562                                                              NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    12563 
    12564                                 static unsigned const s_offMarker  = 55;
    12565                                 static char const     s_szMarker[] = " ; <--- guest";
    12566                                 if (cch < s_offMarker)
    12567                                 {
    12568                                     memset(&szDisBuf[cch], ' ', s_offMarker - cch);
    12569                                     cch = s_offMarker;
    12570                                 }
    12571                                 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
    12572                                     memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
    12573 
    12574                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
    12575                             }
    12576                             else
    12577                             {
    12578                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
    12579                                                 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
    12580                                 cbInstr = 1;
    12581                             }
    12582                             GCPhysPc   += cbInstr;
    12583                             offOpcodes += cbInstr;
    12584                             offRange   += cbInstr;
    12585                             continue;
    12586                         }
    12587 
    12588                         case kIemTbDbgEntryType_ThreadedCall:
    12589                             pHlp->pfnPrintf(pHlp,
    12590                                             "  Call #%u to %s (%u args) - %s\n",
    12591                                             idxThreadedCall,
    12592                                             g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    12593                                             g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    12594                                             pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
    12595                             idxThreadedCall++;
    12596                             continue;
    12597 
    12598                         case kIemTbDbgEntryType_GuestRegShadowing:
    12599                         {
    12600                             PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
    12601                             const char * const    pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
    12602                             if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
    12603                                 pHlp->pfnPrintf(pHlp, "  Guest register %s != host register %s\n", pszGstReg,
    12604                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    12605                             else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
    12606                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s\n", pszGstReg,
    12607                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
    12608                             else
    12609                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s (previously in %s)\n", pszGstReg,
    12610                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
    12611                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    12612                             continue;
    12613                         }
    12614 
    12615                         case kIemTbDbgEntryType_Label:
    12616                         {
    12617                             const char *pszName    = "what_the_fudge";
    12618                             const char *pszComment = "";
    12619                             bool        fNumbered  = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
    12620                             switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
    12621                             {
    12622                                 case kIemNativeLabelType_Return:
    12623                                     pszName = "Return";
    12624                                     break;
    12625                                 case kIemNativeLabelType_ReturnBreak:
    12626                                     pszName = "ReturnBreak";
    12627                                     break;
    12628                                 case kIemNativeLabelType_ReturnWithFlags:
    12629                                     pszName = "ReturnWithFlags";
    12630                                     break;
    12631                                 case kIemNativeLabelType_NonZeroRetOrPassUp:
    12632                                     pszName = "NonZeroRetOrPassUp";
    12633                                     break;
    12634                                 case kIemNativeLabelType_RaiseGp0:
    12635                                     pszName = "RaiseGp0";
    12636                                     break;
    12637                                 case kIemNativeLabelType_ObsoleteTb:
    12638                                     pszName = "ObsoleteTb";
    12639                                     break;
    12640                                 case kIemNativeLabelType_NeedCsLimChecking:
    12641                                     pszName = "NeedCsLimChecking";
    12642                                     break;
    12643                                 case kIemNativeLabelType_If:
    12644                                     pszName = "If";
    12645                                     fNumbered = true;
    12646                                     break;
    12647                                 case kIemNativeLabelType_Else:
    12648                                     pszName = "Else";
    12649                                     fNumbered = true;
    12650                                     pszComment = "   ; regs state restored pre-if-block";
    12651                                     break;
    12652                                 case kIemNativeLabelType_Endif:
    12653                                     pszName = "Endif";
    12654                                     fNumbered = true;
    12655                                     break;
    12656                                 case kIemNativeLabelType_CheckIrq:
    12657                                     pszName = "CheckIrq_CheckVM";
    12658                                     fNumbered = true;
    12659                                     break;
    12660                                 case kIemNativeLabelType_TlbMiss:
    12661                                     pszName = "TlbMiss";
    12662                                     fNumbered = true;
    12663                                     break;
    12664                                 case kIemNativeLabelType_TlbDone:
    12665                                     pszName = "TlbDone";
    12666                                     fNumbered = true;
    12667                                     break;
    12668                                 case kIemNativeLabelType_Invalid:
    12669                                 case kIemNativeLabelType_End:
    12670                                     break;
    12671                             }
    12672                             if (fNumbered)
    12673                                 pHlp->pfnPrintf(pHlp, "  %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
    12674                             else
    12675                                 pHlp->pfnPrintf(pHlp, "  %s:\n", pszName);
    12676                             continue;
    12677                         }
    12678 
    12679                         case kIemTbDbgEntryType_NativeOffset:
    12680                             offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
    12681                             Assert(offDbgNativeNext > offNative);
    12682                             break;
    12683 
    12684                         default:
    12685                             AssertFailed();
    12686                     }
    12687                     iDbgEntry++;
    12688                     break;
    12689                 }
    12690             }
    12691 
    12692             /*
    12693              * Disassemble the next native instruction.
    12694              */
    12695             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    12696 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    12697             uint32_t               cbInstr    = sizeof(paNative[0]);
    12698             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    12699             if (RT_SUCCESS(rc))
    12700             {
    12701 #  if defined(RT_ARCH_AMD64)
    12702                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    12703                 {
    12704                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    12705                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    12706                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    12707                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    12708                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    12709                                         uInfo & 0x8000 ? "recompiled" : "todo");
    12710                     else
    12711                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    12712                 }
    12713                 else
    12714 #  endif
    12715                 {
    12716 #  ifdef RT_ARCH_AMD64
    12717                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    12718                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    12719                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    12720                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    12721 #  elif defined(RT_ARCH_ARM64)
    12722                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    12723                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    12724                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    12725 #  else
    12726 #   error "Port me"
    12727 #  endif
    12728                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    12729                 }
    12730             }
    12731             else
    12732             {
    12733 #  if defined(RT_ARCH_AMD64)
    12734                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    12735                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    12736 #  elif defined(RT_ARCH_ARM64)
    12737                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    12738 #  else
    12739 #   error "Port me"
    12740 #  endif
    12741                 cbInstr = sizeof(paNative[0]);
    12742             }
    12743             offNative += cbInstr / sizeof(paNative[0]);
    12744 
    12745 #  else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    12746             cs_insn *pInstr;
    12747             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    12748                                          (uintptr_t)pNativeCur, 1, &pInstr);
    12749             if (cInstrs > 0)
    12750             {
    12751                 Assert(cInstrs == 1);
    12752 #  if defined(RT_ARCH_AMD64)
    12753                 pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    12754                                 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    12755 #  else
    12756                 pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    12757                                 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    12758 #  endif
    12759                 offNative += pInstr->size / sizeof(*pNativeCur);
    12760                 cs_free(pInstr, cInstrs);
    12761             }
    12762             else
    12763             {
    12764 #  if defined(RT_ARCH_AMD64)
    12765                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    12766                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    12767 #  else
    12768                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    12769 #  endif
    12770                 offNative++;
    12771             }
    12772 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    12773         }
    12774     }
    12775     else
    12776 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    12777     {
    12778         /*
    12779          * No debug info, just disassemble the x86 code and then the native code.
    12780          *
    12781          * First the guest code:
    12782          */
    12783         for (unsigned i = 0; i < pTb->cRanges; i++)
    12784         {
    12785             RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
    12786                               + (pTb->aRanges[i].idxPhysPage == 0
    12787                                  ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    12788                                  : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
    12789             pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    12790                             i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
    12791             unsigned       off       = pTb->aRanges[i].offOpcodes;
    12792             /** @todo this ain't working when crossing pages!   */
    12793             unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
    12794             while (off < cbOpcodes)
    12795             {
    12796                 uint32_t cbInstr = 1;
    12797                 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    12798                                                      &pTb->pabOpcodes[off], cbOpcodes - off,
    12799                                                      iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    12800                 if (RT_SUCCESS(rc))
    12801                 {
    12802                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    12803                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    12804                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    12805                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    12806                     pHlp->pfnPrintf(pHlp, "    %RGp: %s\n", GCPhysPc, szDisBuf);
    12807                     GCPhysPc += cbInstr;
    12808                     off      += cbInstr;
    12809                 }
    12810                 else
    12811                 {
    12812                     pHlp->pfnPrintf(pHlp, "    %RGp: %.*Rhxs - disassembly failure %Rrc\n",
    12813                                     GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
    12814                     break;
    12815                 }
    12816             }
    12817         }
    12818 
    12819         /*
    12820          * Then the native code:
    12821          */
    12822         pHlp->pfnPrintf(pHlp, "  Native code %p L %#x\n", paNative, cNative);
    12823         while (offNative < cNative)
    12824         {
    12825             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    12826 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    12827             uint32_t               cbInstr    = sizeof(paNative[0]);
    12828             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    12829             if (RT_SUCCESS(rc))
    12830             {
    12831 #  if defined(RT_ARCH_AMD64)
    12832                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    12833                 {
    12834                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    12835                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    12836                         pHlp->pfnPrintf(pHlp, "\n    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    12837                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    12838                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    12839                                         uInfo & 0x8000 ? "recompiled" : "todo");
    12840                     else
    12841                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    12842                 }
    12843                 else
    12844 #  endif
    12845                 {
    12846 #  ifdef RT_ARCH_AMD64
    12847                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    12848                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    12849                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    12850                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    12851 #  elif defined(RT_ARCH_ARM64)
    12852                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    12853                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    12854                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    12855 #  else
    12856 #   error "Port me"
    12857 #  endif
    12858                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    12859                 }
    12860             }
    12861             else
    12862             {
    12863 #  if defined(RT_ARCH_AMD64)
    12864                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    12865                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    12866 #  else
    12867                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    12868 #  endif
    12869                 cbInstr = sizeof(paNative[0]);
    12870             }
    12871             offNative += cbInstr / sizeof(paNative[0]);
    12872 
    12873 # else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    12874             cs_insn *pInstr;
    12875             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    12876                                          (uintptr_t)pNativeCur, 1, &pInstr);
    12877             if (cInstrs > 0)
    12878             {
    12879                 Assert(cInstrs == 1);
    12880 #  if defined(RT_ARCH_AMD64)
    12881                 pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    12882                                 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    12883 #  else
    12884                 pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    12885                                 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    12886 #  endif
    12887                 offNative += pInstr->size / sizeof(*pNativeCur);
    12888                 cs_free(pInstr, cInstrs);
    12889             }
    12890             else
    12891             {
    12892 #  if defined(RT_ARCH_AMD64)
    12893                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    12894                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    12895 #  else
    12896                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    12897 #  endif
    12898                 offNative++;
    12899             }
    12900 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    12901         }
    12902     }
    12903 
    12904 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    12905     /* Cleanup. */
    12906     cs_close(&hDisasm);
    12907 #endif
    12908 }
    12909 
    12910 
    12911 /**
    12912  * Recompiles the given threaded TB into a native one.
    12913  *
    12914  * In case of failure the translation block will be returned as-is.
    12915  *
    12916  * @returns pTb.
    12917  * @param   pVCpu   The cross context virtual CPU structure of the calling
    12918  *                  thread.
    12919  * @param   pTb     The threaded translation to recompile to native.
    12920  */
    12921 DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
    12922 {
    12923     STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
    12924 
    12925     /*
    12926      * The first time thru, we allocate the recompiler state, the other times
    12927      * we just need to reset it before using it again.
    12928      */
    12929     PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
    12930     if (RT_LIKELY(pReNative))
    12931         iemNativeReInit(pReNative, pTb);
    12932     else
    12933     {
    12934         pReNative = iemNativeInit(pVCpu, pTb);
    12935         AssertReturn(pReNative, pTb);
    12936     }
    12937 
    12938     /*
    12939      * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
    12940      * for aborting if an error happens.
    12941      */
    12942     uint32_t        cCallsLeft = pTb->Thrd.cCalls;
    12943 #ifdef LOG_ENABLED
    12944     uint32_t const  cCallsOrg  = cCallsLeft;
    12945 #endif
    12946     uint32_t        off        = 0;
    12947     int             rc         = VINF_SUCCESS;
    12948     IEMNATIVE_TRY_SETJMP(pReNative, rc)
    12949     {
    12950         /*
    12951          * Emit prolog code (fixed).
    12952          */
    12953         off = iemNativeEmitProlog(pReNative, off);
    12954 
    12955         /*
    12956          * Convert the calls to native code.
    12957          */
    12958 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    12959         int32_t              iGstInstr        = -1;
    12960 #endif
    12961 #ifndef VBOX_WITHOUT_RELEASE_STATISTICS
    12962         uint32_t             cThreadedCalls   = 0;
    12963         uint32_t             cRecompiledCalls = 0;
    12964 #endif
    12965         PCIEMTHRDEDCALLENTRY pCallEntry       = pTb->Thrd.paCalls;
    12966         pReNative->fExec                      = pTb->fFlags & IEMTB_F_IEM_F_MASK;
    12967         while (cCallsLeft-- > 0)
    12968         {
    12969             PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
    12970 
    12971             /*
    12972              * Debug info and assembly markup.
    12973              */
    12974             if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
    12975                 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
    12976 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    12977             iemNativeDbgInfoAddNativeOffset(pReNative, off);
    12978             if (iGstInstr < (int32_t)pCallEntry->idxInstr)
    12979             {
    12980                 if (iGstInstr < (int32_t)pTb->cInstructions)
    12981                     iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
    12982                 else
    12983                     Assert(iGstInstr == pTb->cInstructions);
    12984                 iGstInstr = pCallEntry->idxInstr;
    12985             }
    12986             iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
    12987 #endif
    12988 #if defined(VBOX_STRICT)
    12989             off = iemNativeEmitMarker(pReNative, off,
    12990                                       RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
    12991                                                   pCallEntry->enmFunction));
    12992 #endif
    12993 #if defined(VBOX_STRICT)
    12994             iemNativeRegAssertSanity(pReNative);
    12995 #endif
    12996 
    12997             /*
    12998              * Actual work.
    12999              */
    13000             Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
    13001                   g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
    13002             if (pfnRecom) /** @todo stats on this.   */
    13003             {
    13004                 off = pfnRecom(pReNative, off, pCallEntry);
    13005                 STAM_REL_STATS({cRecompiledCalls++;});
    13006             }
    13007             else
    13008             {
    13009                 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
    13010                 STAM_REL_STATS({cThreadedCalls++;});
    13011             }
    13012             Assert(off <= pReNative->cInstrBufAlloc);
    13013             Assert(pReNative->cCondDepth == 0);
    13014 
    13015             /*
    13016              * Advance.
    13017              */
    13018             pCallEntry++;
    13019         }
    13020 
    13021         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
    13022         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded,   cThreadedCalls);
    13023         if (!cThreadedCalls)
    13024             STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
    13025 
    13026         /*
    13027          * Emit the epilog code.
    13028          */
    13029         uint32_t idxReturnLabel;
    13030         off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
    13031 
    13032         /*
    13033          * Generate special jump labels.
    13034          */
    13035         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
    13036             off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
    13037         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
    13038             off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
    13039         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
    13040             off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
    13041         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
    13042             off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
    13043         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
    13044             off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
    13045     }
    13046     IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
    13047     {
    13048         Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
    13049         return pTb;
    13050     }
    13051     IEMNATIVE_CATCH_LONGJMP_END(pReNative);
    13052     Assert(off <= pReNative->cInstrBufAlloc);
    13053 
    13054     /*
    13055      * Make sure all labels has been defined.
    13056      */
    13057     PIEMNATIVELABEL const paLabels = pReNative->paLabels;
    13058 #ifdef VBOX_STRICT
    13059     uint32_t const        cLabels  = pReNative->cLabels;
    13060     for (uint32_t i = 0; i < cLabels; i++)
    13061         AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
    13062 #endif
    13063 
    13064     /*
    13065      * Allocate executable memory, copy over the code we've generated.
    13066      */
    13067     PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
    13068     if (pTbAllocator->pDelayedFreeHead)
    13069         iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
    13070 
    13071     PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
    13072     AssertReturn(paFinalInstrBuf, pTb);
    13073     memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
    13074 
    13075     /*
    13076      * Apply fixups.
    13077      */
    13078     PIEMNATIVEFIXUP const paFixups   = pReNative->paFixups;
    13079     uint32_t const        cFixups    = pReNative->cFixups;
    13080     for (uint32_t i = 0; i < cFixups; i++)
    13081     {
    13082         Assert(paFixups[i].off < off);
    13083         Assert(paFixups[i].idxLabel < cLabels);
    13084         AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
    13085                   ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
    13086                    paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
    13087         RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
    13088         switch (paFixups[i].enmType)
    13089         {
    13090 #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
    13091             case kIemNativeFixupType_Rel32:
    13092                 Assert(paFixups[i].off + 4 <= off);
    13093                 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    13094                 continue;
    13095 
    13096 #elif defined(RT_ARCH_ARM64)
    13097             case kIemNativeFixupType_RelImm26At0:
    13098             {
    13099                 Assert(paFixups[i].off < off);
    13100                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    13101                 Assert(offDisp >= -262144 && offDisp < 262144);
    13102                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
    13103                 continue;
    13104             }
    13105 
    13106             case kIemNativeFixupType_RelImm19At5:
    13107             {
    13108                 Assert(paFixups[i].off < off);
    13109                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    13110                 Assert(offDisp >= -262144 && offDisp < 262144);
    13111                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
    13112                 continue;
    13113             }
    13114 
    13115             case kIemNativeFixupType_RelImm14At5:
    13116             {
    13117                 Assert(paFixups[i].off < off);
    13118                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    13119                 Assert(offDisp >= -8192 && offDisp < 8192);
    13120                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
    13121                 continue;
    13122             }
    13123 
    13124 #endif
    13125             case kIemNativeFixupType_Invalid:
    13126             case kIemNativeFixupType_End:
    13127                 break;
    13128         }
    13129         AssertFailed();
    13130     }
    13131 
    13132     iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
    13133     STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
    13134 
    13135     /*
    13136      * Convert the translation block.
    13137      */
    13138     RTMemFree(pTb->Thrd.paCalls);
    13139     pTb->Native.paInstructions  = paFinalInstrBuf;
    13140     pTb->Native.cInstructions   = off;
    13141     pTb->fFlags                 = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
    13142 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    13143     pTb->pDbgInfo               = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
    13144                                                       RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
    13145 #endif
    13146 
    13147     Assert(pTbAllocator->cThreadedTbs > 0);
    13148     pTbAllocator->cThreadedTbs -= 1;
    13149     pTbAllocator->cNativeTbs   += 1;
    13150     Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
    13151 
    13152 #ifdef LOG_ENABLED
    13153     /*
    13154      * Disassemble to the log if enabled.
    13155      */
    13156     if (LogIs3Enabled())
    13157     {
    13158         Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
    13159         iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
    13160 # ifdef DEBUG_bird
    13161         RTLogFlush(NULL);
    13162 # endif
    13163     }
    13164 #endif
    13165     /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
    13166 
    13167     STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
    13168     return pTb;
    13169 }
    13170 
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r102624 r102634  
    47034703 * Emits a call to a CImpl function or something similar.
    47044704 */
    4705 static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
    4706                                       uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
    4707                                      uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
     4705DECL_HIDDEN_THROW(uint32_t)
     4706iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
     4707                       uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
    47084708{
    47094709    /*
     
    1104111041    return off;
    1104211042}
    11043 
    11044 
    11045 
    11046 /*********************************************************************************************************************************
    11047 *   Builtin functions                                                                                                            *
    11048 *********************************************************************************************************************************/
    11049 
    11050 /**
    11051  * Built-in function that calls a C-implemention function taking zero arguments.
    11052  */
    11053 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
    11054 {
    11055     PFNIEMCIMPL0 const pfnCImpl     = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
    11056     uint8_t const      cbInstr      = (uint8_t)pCallEntry->auParams[1];
    11057     uint64_t const     fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
    11058     return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
    11059 }
    11060 
    11061 
    11062 /**
    11063  * Built-in function that checks for pending interrupts that can be delivered or
    11064  * forced action flags.
    11065  *
    11066  * This triggers after the completion of an instruction, so EIP is already at
    11067  * the next instruction.  If an IRQ or important FF is pending, this will return
    11068  * a non-zero status that stops TB execution.
    11069  */
    11070 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
    11071 {
    11072     RT_NOREF(pCallEntry);
    11073 
    11074     /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
    11075        and I'm too lazy to create a 'Fixed' version of that one. */
    11076     uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
    11077                                                           UINT32_MAX, pReNative->uCheckIrqSeqNo++);
    11078 
    11079     uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
    11080 
    11081     /* Again, we need to load the extended EFLAGS before we actually need them
    11082        in case we jump.  We couldn't use iemNativeRegAllocTmpForGuestReg if we
    11083        loaded them inside the check, as the shadow state would not be correct
    11084        when the code branches before the load.  Ditto PC. */
    11085     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    11086                                                               kIemNativeGstRegUse_ReadOnly);
    11087 
    11088     uint8_t const idxPcReg  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
    11089 
    11090     uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    11091 
    11092     /*
    11093      * Start by checking the local forced actions of the EMT we're on for IRQs
    11094      * and other FFs that needs servicing.
    11095      */
    11096     /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
    11097     /* Load FFs in to idxTmpReg and AND with all relevant flags. */
    11098     off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
    11099     off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
    11100                                    VMCPU_FF_ALL_MASK & ~(  VMCPU_FF_PGM_SYNC_CR3
    11101                                                          | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
    11102                                                          | VMCPU_FF_TLB_FLUSH
    11103                                                          | VMCPU_FF_UNHALT ),
    11104                                    true /*fSetFlags*/);
    11105     /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
    11106     uint32_t const offFixupJumpToVmCheck1 = off;
    11107     off = iemNativeEmitJzToFixed(pReNative, off, 0);
    11108 
    11109     /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
    11110        these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
    11111     off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
    11112                                    ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
    11113     /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
    11114     off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
    11115 
    11116     /* So, it's only interrupt releated FFs and we need to see if IRQs are being
    11117        suppressed by the CPU or not. */
    11118     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
    11119     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
    11120                                                               idxLabelReturnBreak);
    11121 
    11122     /* We've got shadow flags set, so we must check that the PC they are valid
    11123        for matches our current PC value. */
    11124     /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
    11125      *        a register. */
    11126     off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
    11127     off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
    11128 
    11129     /*
    11130      * Now check the force flags of the VM.
    11131      */
    11132     iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
    11133     iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
    11134     off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
    11135     off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
    11136     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
    11137     off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
    11138 
    11139     /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
    11140 
    11141     /*
    11142      * We're good, no IRQs or FFs pending.
    11143      */
    11144     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    11145     iemNativeRegFreeTmp(pReNative, idxEflReg);
    11146     iemNativeRegFreeTmp(pReNative, idxPcReg);
    11147 
    11148     return off;
    11149 }
    11150 
    11151 
    11152 /**
    11153  * Built-in function checks if IEMCPU::fExec has the expected value.
    11154  */
    11155 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
    11156 {
    11157     uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
    11158     uint8_t const  idxTmpReg     = iemNativeRegAllocTmp(pReNative, &off);
    11159 
    11160     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
    11161     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
    11162     off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
    11163                                                               kIemNativeLabelType_ReturnBreak);
    11164     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    11165     return off;
    11166 }
    11167 
    11168 
    11169 /**
    11170  * Sets idxTbCurInstr in preparation of raising an exception.
    11171  */
    11172 /** @todo Optimize this, so we don't set the same value more than once.  Just
    11173  *        needs some tracking. */
    11174 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    11175 # define BODY_SET_CUR_INSTR() \
    11176     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
    11177 #else
    11178 # define BODY_SET_CUR_INSTR() ((void)0)
    11179 #endif
    11180 
    11181 
    11182 /**
    11183  * Macro that emits the 16/32-bit CS.LIM check.
    11184  */
    11185 #define BODY_CHECK_CS_LIM(a_cbInstr) \
    11186     off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
    11187 
    11188 DECL_FORCE_INLINE(uint32_t)
    11189 iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    11190 {
    11191     Assert(cbInstr >  0);
    11192     Assert(cbInstr < 16);
    11193 
    11194     /*
    11195      * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
    11196      * a temporary register for calculating the last address of the instruction.
    11197      *
    11198      * The calculation and comparisons are 32-bit.  We ASSUME that the incoming
    11199      * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
    11200      * that last updated EIP here checked it already, and that we're therefore
    11201      * safe in the 32-bit wrap-around scenario to only check that the last byte
    11202      * is within CS.LIM.  In the case of instruction-by-instruction advancing
    11203      * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
    11204      * must be using 4KB granularity and the previous instruction was fine.
    11205      */
    11206     uint8_t const  idxRegPc     = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
    11207                                                                   kIemNativeGstRegUse_ReadOnly);
    11208     uint8_t const  idxRegCsLim  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
    11209                                                                   kIemNativeGstRegUse_ReadOnly);
    11210 #ifdef RT_ARCH_AMD64
    11211     uint8_t * const pbCodeBuf   = iemNativeInstrBufEnsure(pReNative, off, 8);
    11212 #elif defined(RT_ARCH_ARM64)
    11213     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    11214 #else
    11215 # error "Port me"
    11216 #endif
    11217 
    11218     if (cbInstr != 1)
    11219     {
    11220         uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
    11221 
    11222         /*
    11223          * 1. idxRegTmp = idxRegPc + cbInstr;
    11224          * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
    11225          */
    11226 #ifdef RT_ARCH_AMD64
    11227         /* 1. lea tmp32, [Pc + cbInstr - 1] */
    11228         if (idxRegTmp >= 8 || idxRegPc >= 8)
    11229             pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
    11230         pbCodeBuf[off++] = 0x8d;
    11231         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
    11232         if ((idxRegPc & 7) == X86_GREG_xSP)
    11233             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
    11234         pbCodeBuf[off++] = cbInstr - 1;
    11235 
    11236         /* 2. cmp tmp32(r), CsLim(r/m). */
    11237         if (idxRegTmp >= 8 || idxRegCsLim >= 8)
    11238             pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
    11239         pbCodeBuf[off++] = 0x3b;
    11240         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
    11241 
    11242 #elif defined(RT_ARCH_ARM64)
    11243         /* 1. add tmp32, Pc, #cbInstr-1 */
    11244         pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
    11245         /* 2. cmp tmp32, CsLim */
    11246         pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
    11247                                                       false /*f64Bit*/, true /*fSetFlags*/);
    11248 
    11249 #endif
    11250         iemNativeRegFreeTmp(pReNative, idxRegTmp);
    11251     }
    11252     else
    11253     {
    11254         /*
    11255          * Here we can skip step 1 and compare PC and CS.LIM directly.
    11256          */
    11257 #ifdef RT_ARCH_AMD64
    11258         /* 2. cmp eip(r), CsLim(r/m). */
    11259         if (idxRegPc >= 8 || idxRegCsLim >= 8)
    11260             pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
    11261         pbCodeBuf[off++] = 0x3b;
    11262         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
    11263 
    11264 #elif defined(RT_ARCH_ARM64)
    11265         /* 2. cmp Pc, CsLim */
    11266         pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
    11267                                                       false /*f64Bit*/, true /*fSetFlags*/);
    11268 
    11269 #endif
    11270     }
    11271 
    11272     /* 3. Jump if greater. */
    11273     off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    11274 
    11275     iemNativeRegFreeTmp(pReNative, idxRegCsLim);
    11276     iemNativeRegFreeTmp(pReNative, idxRegPc);
    11277     return off;
    11278 }
    11279 
    11280 
    11281 /**
    11282  * Macro that considers whether we need CS.LIM checking after a branch or
    11283  * crossing over to a new page.
    11284  */
    11285 #define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
    11286     RT_NOREF(cbInstr); \
    11287     off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
    11288 
    11289 DECL_FORCE_INLINE(uint32_t)
    11290 iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    11291 {
    11292     /*
    11293      * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
    11294      * exactly:
    11295      *
    11296      *  int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
    11297      *  if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
    11298      *      return fRet;
    11299      *  return fRet | IEMTB_F_CS_LIM_CHECKS;
    11300      *
    11301      *
    11302      * We need EIP, CS.LIM and CS.BASE here.
    11303      */
    11304 
    11305     /* Calculate the offFromLim first: */
    11306     uint8_t const  idxRegPc     = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
    11307                                                                   kIemNativeGstRegUse_ReadOnly);
    11308     uint8_t const  idxRegCsLim  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
    11309                                                                   kIemNativeGstRegUse_ReadOnly);
    11310     uint8_t const  idxRegLeft   = iemNativeRegAllocTmp(pReNative, &off);
    11311 
    11312 #ifdef RT_ARCH_ARM64
    11313     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11314     pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
    11315     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11316 #else
    11317     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
    11318     off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
    11319 #endif
    11320 
    11321     iemNativeRegFreeTmp(pReNative, idxRegCsLim);
    11322     iemNativeRegFreeTmp(pReNative, idxRegPc);
    11323 
    11324     /* Calculate the threshold level (right side). */
    11325     uint8_t const  idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
    11326                                                                   kIemNativeGstRegUse_ReadOnly);
    11327     uint8_t const  idxRegRight  = iemNativeRegAllocTmp(pReNative, &off);
    11328 
    11329 #ifdef RT_ARCH_ARM64
    11330     pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
    11331     Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
    11332     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
    11333     pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
    11334     pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
    11335     pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
    11336     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11337 
    11338 #else
    11339     off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
    11340     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
    11341     off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
    11342     off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
    11343 #endif
    11344 
    11345     iemNativeRegFreeTmp(pReNative, idxRegCsBase);
    11346 
    11347     /* Compare the two and jump out if we're too close to the limit. */
    11348     off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
    11349     off = iemNativeEmitJlToNewLabel(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
    11350 
    11351     iemNativeRegFreeTmp(pReNative, idxRegRight);
    11352     iemNativeRegFreeTmp(pReNative, idxRegLeft);
    11353     return off;
    11354 }
    11355 
    11356 
    11357 
    11358 /**
    11359  * Macro that implements opcode (re-)checking.
    11360  */
    11361 #define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
    11362     RT_NOREF(cbInstr); \
    11363     off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
    11364 
    11365 DECL_FORCE_INLINE(uint32_t)
    11366 iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
    11367 {
    11368     Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
    11369     Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
    11370 
    11371     uint32_t const      idxLabelObsoleteTb = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ObsoleteTb);
    11372 
    11373     /*
    11374      * Where to start and how much to compare.
    11375      *
    11376      * Looking at the ranges produced when r160746 was running a DOS VM with TB
    11377      * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
    11378      * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
    11379      *
    11380      * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
    11381      * 12, 10, 11, 6, 13, 15 and 16.  Max 0x359 bytes. Same revision as above.
    11382      */
    11383     uint16_t            offPage     = pTb->aRanges[idxRange].offPhysPage + offRange;
    11384     uint16_t            cbLeft      = pTb->aRanges[idxRange].cbOpcodes   - offRange;
    11385     Assert(cbLeft > 0);
    11386     uint8_t const      *pbOpcodes   = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes];
    11387     uint32_t            offConsolidatedJump = UINT32_MAX;
    11388 
    11389 #ifdef RT_ARCH_AMD64
    11390     /* AMD64/x86 offers a bunch of options.  Smaller stuff will can be
    11391        completely inlined, for larger we use REPE CMPS.  */
    11392 # define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */  do { \
    11393             pbCodeBuf[off++] = a_bOpcode; \
    11394             Assert(offPage < 127); \
    11395             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
    11396             pbCodeBuf[off++] = RT_BYTE1(offPage); \
    11397         } while (0)
    11398 
    11399 # define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
    11400             if (offConsolidatedJump != UINT32_MAX) \
    11401             { \
    11402                 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
    11403                 Assert(offDisp >= -128); \
    11404                 pbCodeBuf[off++] = 0x75; /* jnz near */ \
    11405                 pbCodeBuf[off++] = (uint8_t)offDisp; \
    11406             } \
    11407             else \
    11408             { \
    11409                 pbCodeBuf[off++] = 0x74; /* jz near +5 */ \
    11410                 pbCodeBuf[off++] = 0x05; \
    11411                 offConsolidatedJump = off; \
    11412                 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */ \
    11413                 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_Rel32, -4); \
    11414                 pbCodeBuf[off++] = 0x00; \
    11415                 pbCodeBuf[off++] = 0x00; \
    11416                 pbCodeBuf[off++] = 0x00; \
    11417                 pbCodeBuf[off++] = 0x00; \
    11418             } \
    11419         } while (0)
    11420 
    11421 # define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
    11422         CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
    11423         pbCodeBuf[off++] = *pbOpcodes++; \
    11424         pbCodeBuf[off++] = *pbOpcodes++; \
    11425         pbCodeBuf[off++] = *pbOpcodes++; \
    11426         pbCodeBuf[off++] = *pbOpcodes++; \
    11427         cbLeft  -= 4; \
    11428         offPage += 4; \
    11429         CHECK_OPCODES_CMP_JMP(); \
    11430     } while (0)
    11431 
    11432 # define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
    11433         pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
    11434         CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
    11435         pbCodeBuf[off++] = *pbOpcodes++; \
    11436         pbCodeBuf[off++] = *pbOpcodes++; \
    11437         cbLeft  -= 2; \
    11438         offPage += 2; \
    11439         CHECK_OPCODES_CMP_JMP(); \
    11440     } while (0)
    11441 
    11442 # define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
    11443         CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
    11444         pbCodeBuf[off++] = *pbOpcodes++; \
    11445         cbLeft  -= 1; \
    11446         offPage += 1; \
    11447         CHECK_OPCODES_CMP_JMP(); \
    11448     } while (0)
    11449 
    11450 # define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
    11451         if (a_bPrefix) \
    11452             pbCodeBuf[off++] = (a_bPrefix); \
    11453         pbCodeBuf[off++] = (a_bOpcode); \
    11454         CHECK_OPCODES_CMP_JMP(); \
    11455         cbLeft -= (a_cbToSubtract); \
    11456     } while (0)
    11457 
    11458 # define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
    11459         pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
    11460         pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
    11461         pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
    11462         pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
    11463         pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
    11464     } while (0)
    11465 
    11466     if (cbLeft <= 24)
    11467     {
    11468         uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
    11469                                                            (  RT_BIT_32(X86_GREG_xAX)
    11470                                                             | RT_BIT_32(X86_GREG_xCX)
    11471                                                             | RT_BIT_32(X86_GREG_xDX)
    11472                                                             | RT_BIT_32(X86_GREG_xBX)
    11473                                                             | RT_BIT_32(X86_GREG_xSI)
    11474                                                             | RT_BIT_32(X86_GREG_xDI))
    11475                                                          & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
    11476         off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
    11477         if (offPage >= 128 - cbLeft)
    11478         {
    11479             off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
    11480             offPage &= 3;
    11481         }
    11482 
    11483         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 14 + 54 + 8 + 6 /* = 87 */);
    11484 
    11485         if (cbLeft > 8)
    11486             switch (offPage & 3)
    11487             {
    11488                 case 0:
    11489                     break;
    11490                 case 1: /* cost: 6 + 8 = 14 */
    11491                     CHECK_OPCODES_CMP_IMM8(idxRegTmp);
    11492                     RT_FALL_THRU();
    11493                 case 2: /* cost: 8 */
    11494                     CHECK_OPCODES_CMP_IMM16(idxRegTmp);
    11495                     break;
    11496                 case 3: /* cost: 6 */
    11497                     CHECK_OPCODES_CMP_IMM8(idxRegTmp);
    11498                     break;
    11499             }
    11500 
    11501         while (cbLeft >= 4)
    11502             CHECK_OPCODES_CMP_IMM32(idxRegTmp);     /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
    11503 
    11504         if (cbLeft >= 2)
    11505             CHECK_OPCODES_CMP_IMM16(idxRegTmp);     /* cost: 8 */
    11506         if (cbLeft)
    11507             CHECK_OPCODES_CMP_IMM8(idxRegTmp);      /* cost: 6 */
    11508 
    11509         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11510         iemNativeRegFreeTmp(pReNative, idxRegTmp);
    11511     }
    11512     else
    11513     {
    11514         /* RDI = &pbInstrBuf[offPage] */
    11515         uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
    11516         off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
    11517         if (offPage != 0)
    11518             off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
    11519 
    11520         /* RSI = pbOpcodes */
    11521         uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
    11522         off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
    11523 
    11524         /* RCX = counts. */
    11525         uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
    11526 
    11527         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5 + 10 + 5 + 5 + 3 + 4 + 3 /*= 35*/);
    11528 
    11529         /** @todo profile and optimize this further.  Maybe an idea to align by
    11530          *        offPage if the two cannot be reconsidled. */
    11531         /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
    11532         switch (offPage & 7)                                            /* max cost: 10 */
    11533         {
    11534             case 0:
    11535                 break;
    11536             case 1: /* cost: 3+4+3 = 10 */
    11537                 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
    11538                 RT_FALL_THRU();
    11539             case 2: /* cost: 4+3 = 7 */
    11540                 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
    11541                 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
    11542                 break;
    11543             case 3: /* cost: 3+3 = 6 */
    11544                 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
    11545                 RT_FALL_THRU();
    11546             case 4: /* cost: 3 */
    11547                 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
    11548                 break;
    11549             case 5: /* cost: 3+4 = 7 */
    11550                 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
    11551                 RT_FALL_THRU();
    11552             case 6: /* cost: 4 */
    11553                 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
    11554                 break;
    11555             case 7: /* cost: 3 */
    11556                 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
    11557                 break;
    11558         }
    11559 
    11560         /* Compare qwords: */
    11561         uint32_t const cQWords = cbLeft >> 3;
    11562         CHECK_OPCODES_ECX_IMM(cQWords);                                     /* cost: 5 */
    11563 
    11564         pbCodeBuf[off++] = X86_OP_PRF_REPZ;                                 /* cost: 5 */
    11565         CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
    11566         cbLeft &= 7;
    11567 
    11568         if (cbLeft & 4)
    11569             CHECK_OPCODES_CMPSX(0xa7, 0, 0);                                /* cost: 3 */
    11570         if (cbLeft & 2)
    11571             CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_PRF_SIZE_OP);               /* cost: 4 */
    11572         if (cbLeft & 2)
    11573             CHECK_OPCODES_CMPSX(0xa6, 0, 0);                                /* cost: 3 */
    11574 
    11575         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11576         iemNativeRegFreeTmp(pReNative, idxRegCx);
    11577         iemNativeRegFreeTmp(pReNative, idxRegSi);
    11578         iemNativeRegFreeTmp(pReNative, idxRegDi);
    11579     }
    11580 
    11581 #elif defined(RT_ARCH_ARM64)
    11582     /* We need pbInstrBuf in a register, whatever we do. */
    11583     uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
    11584     off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
    11585 
    11586     /* We also need at least one more register for holding bytes & words we
    11587        load via pbInstrBuf. */
    11588     uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
    11589 
    11590     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
    11591 
    11592     /* One byte compare can be done with the opcode byte as an immediate. We'll
    11593        do this to uint16_t align src1. */
    11594     bool fPendingJmp = RT_BOOL(offPage & 1);
    11595     if (fPendingJmp)
    11596     {
    11597         pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
    11598         pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
    11599         offPage += 1;
    11600         cbLeft  -= 1;
    11601     }
    11602 
    11603     if (cbLeft > 0)
    11604     {
    11605         /* We need a register for holding the opcode bytes we're comparing with,
    11606            as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
    11607         uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
    11608 
    11609         /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
    11610         if ((offPage & 3) && cbLeft >= 2)
    11611         {
    11612             pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
    11613             pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
    11614             if (fPendingJmp)
    11615                 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11616                                                             ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
    11617             else
    11618             {
    11619                 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
    11620                 fPendingJmp = true;
    11621             }
    11622             pbOpcodes += 2;
    11623             offPage   += 2;
    11624             cbLeft    -= 2;
    11625         }
    11626 
    11627         /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
    11628         if ((offPage & 7) && cbLeft >= 4)
    11629         {
    11630             pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
    11631             off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
    11632                                               RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
    11633             if (fPendingJmp)
    11634                 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11635                                                             ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
    11636             else
    11637             {
    11638                 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
    11639                 fPendingJmp = true;
    11640             }
    11641             pbOpcodes += 4;
    11642             offPage   += 4;
    11643             cbLeft    -= 4;
    11644         }
    11645 
    11646         /*
    11647          * If we've got 16 bytes or more left, switch to memcmp-style.
    11648          */
    11649         if (cbLeft >= 16)
    11650         {
    11651             /* We need a pointer to the copy of the original opcode bytes. */
    11652             uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
    11653             off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
    11654 
    11655             /* If there are more than 32 bytes to compare we create a loop, for
    11656                which we'll need a loop register. */
    11657             if (cbLeft >= 64)
    11658             {
    11659                 if (fPendingJmp)
    11660                 {
    11661                     iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
    11662                     pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
    11663                     fPendingJmp = false;
    11664                 }
    11665 
    11666                 uint8_t const  idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
    11667                 uint16_t const cLoops     = cbLeft / 32;
    11668                 cbLeft                    = cbLeft % 32;
    11669                 pbOpcodes                += cLoops * 32;
    11670                 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
    11671 
    11672                 if (offPage != 0) /** @todo optimize out this instruction. */
    11673                 {
    11674                     pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
    11675                     offPage = 0;
    11676                 }
    11677 
    11678                 uint32_t const offLoopStart = off;
    11679                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
    11680                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
    11681                 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
    11682 
    11683                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
    11684                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
    11685                 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11686                                                             ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
    11687 
    11688                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
    11689                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
    11690                 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11691                                                             ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
    11692 
    11693                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
    11694                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
    11695                 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11696                                                             ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
    11697 
    11698                 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
    11699                 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
    11700 
    11701                 /* Advance and loop. */
    11702                 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
    11703                 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
    11704                 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
    11705                 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
    11706 
    11707                 iemNativeRegFreeTmp(pReNative, idxRegLoop);
    11708             }
    11709 
    11710             /* Deal with any remaining dwords (uint64_t).  There can be up to
    11711                three if we looped and four if we didn't. */
    11712             uint32_t offSrc2 = 0;
    11713             while (cbLeft >= 8)
    11714             {
    11715                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
    11716                                                               idxRegSrc1Ptr, offPage / 8);
    11717                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
    11718                                                               idxRegSrc2Ptr, offSrc2 / 8);
    11719                 if (fPendingJmp)
    11720                     pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11721                                                                 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
    11722                 else
    11723                 {
    11724                     pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
    11725                     fPendingJmp = true;
    11726                 }
    11727                 pbOpcodes += 8;
    11728                 offPage   += 8;
    11729                 offSrc2   += 8;
    11730                 cbLeft    -= 8;
    11731             }
    11732 
    11733             iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
    11734             /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
    11735         }
    11736         /*
    11737          * Otherwise, we compare with constants and merge with the general mop-up.
    11738          */
    11739         else
    11740         {
    11741             while (cbLeft >= 8)
    11742             {
    11743                 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
    11744                                                               offPage / 8);
    11745                 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
    11746                                                 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
    11747                                                                         pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
    11748                 if (fPendingJmp)
    11749                     pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11750                                                                 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
    11751                 else
    11752                 {
    11753                     pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
    11754                     fPendingJmp = true;
    11755                 }
    11756                 pbOpcodes += 8;
    11757                 offPage   += 8;
    11758                 cbLeft    -= 8;
    11759             }
    11760             /* max cost thus far: 21 */
    11761         }
    11762 
    11763         /* Deal with any remaining bytes (7 or less). */
    11764         Assert(cbLeft < 8);
    11765         if (cbLeft >= 4)
    11766         {
    11767             pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
    11768                                                           offPage / 4);
    11769             off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
    11770                                               RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
    11771             if (fPendingJmp)
    11772                 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11773                                                             ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
    11774             else
    11775             {
    11776                 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
    11777                 fPendingJmp = true;
    11778             }
    11779             pbOpcodes += 4;
    11780             offPage   += 4;
    11781             cbLeft    -= 4;
    11782 
    11783         }
    11784 
    11785         if (cbLeft >= 2)
    11786         {
    11787             pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
    11788                                                           offPage / 2);
    11789             pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
    11790             if (fPendingJmp)
    11791                 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11792                                                             ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
    11793             else
    11794             {
    11795                 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
    11796                 fPendingJmp = true;
    11797             }
    11798             pbOpcodes += 2;
    11799             offPage   += 2;
    11800             cbLeft    -= 2;
    11801         }
    11802 
    11803         if (cbLeft > 0)
    11804         {
    11805             Assert(cbLeft == 1);
    11806             pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
    11807             if (fPendingJmp)
    11808             {
    11809                 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
    11810                 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
    11811                                                             ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
    11812             }
    11813             else
    11814             {
    11815                 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
    11816                 fPendingJmp = true;
    11817             }
    11818             pbOpcodes += 1;
    11819             offPage   += 1;
    11820             cbLeft    -= 1;
    11821         }
    11822 
    11823         iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
    11824     }
    11825     Assert(cbLeft == 0);
    11826 
    11827     /*
    11828      * Finally, the branch on difference.
    11829      */
    11830     if (fPendingJmp)
    11831     {
    11832         iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5);
    11833         pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0);
    11834     }
    11835     RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb);
    11836 
    11837     /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
    11838     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11839     iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
    11840     iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
    11841 
    11842 #else
    11843 # error "Port me"
    11844 #endif
    11845     return off;
    11846 }
    11847 
    11848 
    11849 #ifdef BODY_CHECK_CS_LIM
    11850 /**
    11851  * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
    11852  * raising a \#GP(0) if this isn't the case.
    11853  */
    11854 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
    11855 {
    11856     uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];
    11857     BODY_SET_CUR_INSTR();
    11858     BODY_CHECK_CS_LIM(cbInstr);
    11859     return off;
    11860 }
    11861 #endif
    11862 
    11863 
    11864 #if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
    11865 /**
    11866  * Built-in function for re-checking opcodes and CS.LIM after an instruction
    11867  * that may have modified them.
    11868  */
    11869 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
    11870 {
    11871     PCIEMTB const  pTb      = pReNative->pTbOrg;
    11872     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    11873     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    11874     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    11875     BODY_SET_CUR_INSTR();
    11876     BODY_CHECK_CS_LIM(cbInstr);
    11877     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    11878     return off;
    11879 }
    11880 #endif
    11881 
    11882 
    11883 #if defined(BODY_CHECK_OPCODES)
    11884 /**
    11885  * Built-in function for re-checking opcodes after an instruction that may have
    11886  * modified them.
    11887  */
    11888 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
    11889 {
    11890     PCIEMTB const  pTb      = pReNative->pTbOrg;
    11891     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    11892     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    11893     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    11894     BODY_SET_CUR_INSTR();
    11895     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    11896     return off;
    11897 }
    11898 #endif
    11899 
    11900 
    11901 #if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
    11902 /**
    11903  * Built-in function for re-checking opcodes and considering the need for CS.LIM
    11904  * checking after an instruction that may have modified them.
    11905  */
    11906 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
    11907 {
    11908     PCIEMTB const  pTb      = pReNative->pTbOrg;
    11909     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    11910     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    11911     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    11912     BODY_SET_CUR_INSTR();
    11913     BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
    11914     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    11915     return off;
    11916 }
    11917 #endif
    11918 
    11919 
    11920 /*
    11921  * Post-branching checkers.
    11922  */
    11923 
    11924 #if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
    11925 /**
    11926  * Built-in function for checking CS.LIM, checking the PC and checking opcodes
    11927  * after conditional branching within the same page.
    11928  *
    11929  * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
    11930  */
    11931 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
    11932 {
    11933     PCIEMTB const  pTb      = pReNative->pTbOrg;
    11934     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    11935     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    11936     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    11937     //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
    11938     BODY_SET_CUR_INSTR();
    11939     BODY_CHECK_CS_LIM(cbInstr);
    11940     BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
    11941     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    11942     //LogFunc(("okay\n"));
    11943     return off;
    11944 }
    11945 #endif
    11946 
    11947 
    11948 #if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
    11949 /**
    11950  * Built-in function for checking the PC and checking opcodes after conditional
    11951  * branching within the same page.
    11952  *
    11953  * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
    11954  */
    11955 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
    11956 {
    11957     PCIEMTB const  pTb      = pReNative->pTbOrg;
    11958     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    11959     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    11960     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    11961     //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
    11962     BODY_SET_CUR_INSTR();
    11963     BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
    11964     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    11965     //LogFunc(("okay\n"));
    11966     return off;
    11967 }
    11968 #endif
    11969 
    11970 
    11971 #if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
    11972 /**
    11973  * Built-in function for checking the PC and checking opcodes and considering
    11974  * the need for CS.LIM checking after conditional branching within the same
    11975  * page.
    11976  *
    11977  * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
    11978  */
    11979 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
    11980 {
    11981     PCIEMTB const  pTb      = pReNative->pTbOrg;
    11982     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    11983     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    11984     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    11985     //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
    11986     BODY_SET_CUR_INSTR();
    11987     BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
    11988     BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, cbInstr);
    11989     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    11990     //LogFunc(("okay\n"));
    11991     return off;
    11992 }
    11993 #endif
    11994 
    11995 
    11996 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
    11997 /**
    11998  * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
    11999  * transitioning to a different code page.
    12000  *
    12001  * The code page transition can either be natural over onto the next page (with
    12002  * the instruction starting at page offset zero) or by means of branching.
    12003  *
    12004  * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
    12005  */
    12006 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
    12007 {
    12008     PCIEMTB const  pTb      = pReNative->pTbOrg;
    12009     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    12010     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    12011     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    12012     //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
    12013     BODY_SET_CUR_INSTR();
    12014     BODY_CHECK_CS_LIM(cbInstr);
    12015     BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
    12016     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    12017     //LogFunc(("okay\n"));
    12018     return off;
    12019 }
    12020 #endif
    12021 
    12022 
    12023 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
    12024 /**
    12025  * Built-in function for loading TLB and checking opcodes when transitioning to
    12026  * a different code page.
    12027  *
    12028  * The code page transition can either be natural over onto the next page (with
    12029  * the instruction starting at page offset zero) or by means of branching.
    12030  *
    12031  * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
    12032  */
    12033 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
    12034 {
    12035     PCIEMTB const  pTb      = pReNative->pTbOrg;
    12036     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    12037     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    12038     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    12039     //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
    12040     BODY_SET_CUR_INSTR();
    12041     BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
    12042     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    12043     //LogFunc(("okay\n"));
    12044     return off;
    12045 }
    12046 #endif
    12047 
    12048 
    12049 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
    12050 /**
    12051  * Built-in function for loading TLB and checking opcodes and considering the
    12052  * need for CS.LIM checking when transitioning to a different code page.
    12053  *
    12054  * The code page transition can either be natural over onto the next page (with
    12055  * the instruction starting at page offset zero) or by means of branching.
    12056  *
    12057  * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
    12058  */
    12059 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
    12060 {
    12061     PCIEMTB const  pTb      = pReNative->pTbOrg;
    12062     uint32_t const cbInstr  = (uint32_t)pCallEntry->auParams[0];
    12063     uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
    12064     uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
    12065     //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
    12066     BODY_SET_CUR_INSTR();
    12067     BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
    12068     BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
    12069     BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
    12070     //LogFunc(("okay\n"));
    12071     return off;
    12072 }
    12073 #endif
    12074 
    12075 
    12076 
    12077 /*
    12078  * Natural page crossing checkers.
    12079  */
    12080 
    12081 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
    12082 /**
    12083  * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
    12084  * both pages when transitioning to a different code page.
    12085  *
    12086  * This is used when the previous instruction requires revalidation of opcodes
    12087  * bytes and the current instruction stries a page boundrary with opcode bytes
    12088  * in both the old and new page.
    12089  *
    12090  * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
    12091  */
    12092 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
    12093 {
    12094     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12095     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12096     uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
    12097     uint32_t const idxRange1   = (uint32_t)pCallEntry->auParams[1];
    12098     uint32_t const offRange1   = (uint32_t)pCallEntry->auParams[2];
    12099     uint32_t const idxRange2   = idxRange1 + 1;
    12100     BODY_SET_CUR_INSTR();
    12101     BODY_CHECK_CS_LIM(cbInstr);
    12102     BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
    12103     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
    12104     BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
    12105     return off;
    12106 }
    12107 #endif
    12108 
    12109 
    12110 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
    12111 /**
    12112  * Built-in function for loading TLB and checking opcodes on both pages when
    12113  * transitioning to a different code page.
    12114  *
    12115  * This is used when the previous instruction requires revalidation of opcodes
    12116  * bytes and the current instruction stries a page boundrary with opcode bytes
    12117  * in both the old and new page.
    12118  *
    12119  * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
    12120  */
    12121 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
    12122 {
    12123     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12124     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12125     uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
    12126     uint32_t const idxRange1   = (uint32_t)pCallEntry->auParams[1];
    12127     uint32_t const offRange1   = (uint32_t)pCallEntry->auParams[2];
    12128     uint32_t const idxRange2   = idxRange1 + 1;
    12129     BODY_SET_CUR_INSTR();
    12130     BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
    12131     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
    12132     BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
    12133     return off;
    12134 }
    12135 #endif
    12136 
    12137 
    12138 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
    12139 /**
    12140  * Built-in function for loading TLB and checking opcodes on both pages and
    12141  * considering the need for CS.LIM checking when transitioning to a different
    12142  * code page.
    12143  *
    12144  * This is used when the previous instruction requires revalidation of opcodes
    12145  * bytes and the current instruction stries a page boundrary with opcode bytes
    12146  * in both the old and new page.
    12147  *
    12148  * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
    12149  */
    12150 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
    12151 {
    12152     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12153     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12154     uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
    12155     uint32_t const idxRange1   = (uint32_t)pCallEntry->auParams[1];
    12156     uint32_t const offRange1   = (uint32_t)pCallEntry->auParams[2];
    12157     uint32_t const idxRange2   = idxRange1 + 1;
    12158     BODY_SET_CUR_INSTR();
    12159     BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
    12160     BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
    12161     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
    12162     BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
    12163     return off;
    12164 }
    12165 #endif
    12166 
    12167 
    12168 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
    12169 /**
    12170  * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
    12171  * advancing naturally to a different code page.
    12172  *
    12173  * Only opcodes on the new page is checked.
    12174  *
    12175  * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
    12176  */
    12177 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
    12178 {
    12179     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12180     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12181     uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
    12182     uint32_t const idxRange1   = (uint32_t)pCallEntry->auParams[1];
    12183     //uint32_t const offRange1   = (uint32_t)uParam2;
    12184     uint32_t const idxRange2   = idxRange1 + 1;
    12185     BODY_SET_CUR_INSTR();
    12186     BODY_CHECK_CS_LIM(cbInstr);
    12187     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
    12188     BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
    12189     return off;
    12190 }
    12191 #endif
    12192 
    12193 
    12194 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
    12195 /**
    12196  * Built-in function for loading TLB and checking opcodes when advancing
    12197  * naturally to a different code page.
    12198  *
    12199  * Only opcodes on the new page is checked.
    12200  *
    12201  * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
    12202  */
    12203 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
    12204 {
    12205     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12206     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12207     uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
    12208     uint32_t const idxRange1   = (uint32_t)pCallEntry->auParams[1];
    12209     //uint32_t const offRange1   = (uint32_t)pCallEntry->auParams[2];
    12210     uint32_t const idxRange2   = idxRange1 + 1;
    12211     BODY_SET_CUR_INSTR();
    12212     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
    12213     BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
    12214     return off;
    12215 }
    12216 #endif
    12217 
    12218 
    12219 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
    12220 /**
    12221  * Built-in function for loading TLB and checking opcodes and considering the
    12222  * need for CS.LIM checking when advancing naturally to a different code page.
    12223  *
    12224  * Only opcodes on the new page is checked.
    12225  *
    12226  * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
    12227  */
    12228 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
    12229 {
    12230     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12231     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12232     uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
    12233     uint32_t const idxRange1   = (uint32_t)pCallEntry->auParams[1];
    12234     //uint32_t const offRange1   = (uint32_t)pCallEntry->auParams[2];
    12235     uint32_t const idxRange2   = idxRange1 + 1;
    12236     BODY_SET_CUR_INSTR();
    12237     BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
    12238     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
    12239     BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
    12240     return off;
    12241 }
    12242 #endif
    12243 
    12244 
    12245 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
    12246 /**
    12247  * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
    12248  * advancing naturally to a different code page with first instr at byte 0.
    12249  *
    12250  * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
    12251  */
    12252 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
    12253 {
    12254     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12255     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12256     uint32_t const idxRange    = (uint32_t)pCallEntry->auParams[1];
    12257     BODY_SET_CUR_INSTR();
    12258     BODY_CHECK_CS_LIM(cbInstr);
    12259     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
    12260     //Assert(pVCpu->iem.s.offCurInstrStart == 0);
    12261     BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
    12262     return off;
    12263 }
    12264 #endif
    12265 
    12266 
    12267 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
    12268 /**
    12269  * Built-in function for loading TLB and checking opcodes when advancing
    12270  * naturally to a different code page with first instr at byte 0.
    12271  *
    12272  * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
    12273  */
    12274 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
    12275 {
    12276     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12277     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12278     uint32_t const idxRange    = (uint32_t)pCallEntry->auParams[1];
    12279     BODY_SET_CUR_INSTR();
    12280     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
    12281     //Assert(pVCpu->iem.s.offCurInstrStart == 0);
    12282     BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
    12283     return off;
    12284 }
    12285 #endif
    12286 
    12287 
    12288 #if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
    12289 /**
    12290  * Built-in function for loading TLB and checking opcodes and considering the
    12291  * need for CS.LIM checking when advancing naturally to a different code page
    12292  * with first instr at byte 0.
    12293  *
    12294  * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
    12295  */
    12296 static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
    12297 {
    12298     PCIEMTB const  pTb         = pReNative->pTbOrg;
    12299     uint32_t const cbInstr     = (uint32_t)pCallEntry->auParams[0];
    12300     uint32_t const idxRange    = (uint32_t)pCallEntry->auParams[1];
    12301     BODY_SET_CUR_INSTR();
    12302     BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
    12303     BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
    12304     //Assert(pVCpu->iem.s.offCurInstrStart == 0);
    12305     BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
    12306     return off;
    12307 }
    12308 #endif
    1230911043
    1231011044
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r102624 r102634  
    834834                                                                 uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg);
    835835DECL_HIDDEN_THROW(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr);
     836DECL_HIDDEN_THROW(uint32_t) iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
     837                                                   uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
     838                                                   uint64_t uParam0, uint64_t uParam1, uint64_t uParam2);
     839
     840IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_DeferToCImpl0);
     841IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckIrq);
     842IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckMode);
     843IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckCsLim);
     844IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes);
     845IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodes);
     846IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim);
     847IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes);
     848IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes);
     849IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim);
     850IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb);
     851IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb);
     852IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim);
     853IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb);
     854IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb);
     855IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim);
     856IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb);
     857IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb);
     858IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim);
     859IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb);
     860IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb);
     861IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim);
    836862
    837863extern DECL_HIDDEN_DATA(const char * const) g_apszIemNativeHstRegNames[];
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette