VirtualBox

Changeset 103807 in vbox


Ignore:
Timestamp:
Mar 12, 2024 7:43:31 PM (12 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162176
Message:

VMM/IEM: Split up the native recompiler functions (IEMNativeFunctions.cpp.h) into 4 files to speed up compilation and reduce compiler memory consumption. This involved splitting out half the content of IEMAllThrdRecompiler.cpp into IEMAllN8veRecompFuncs.h and IEMN8veRecompiler.h. bugref:10371

Location:
trunk/src/VBox/VMM
Files:
4 added
4 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/Makefile.kmk

    r103769 r103807  
    263263  VBoxVMM_SOURCES += \
    264264        VMMAll/IEMAllN8veRecompiler.cpp \
     265        VMMAll/IEMAllN8veRecompFuncs1.cpp \
     266        VMMAll/IEMAllN8veRecompFuncs2.cpp \
     267        VMMAll/IEMAllN8veRecompFuncs3.cpp \
     268        VMMAll/IEMAllN8veRecompFuncs4.cpp \
    265269        VMMAll/IEMAllN8veRecompBltIn.cpp \
    266270        VMMAll/IEMAllN8veLiveness.cpp
     
    597601  VBoxVMM_INTERMEDIATES += \
    598602        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.h \
    599         $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.cpp.h \
     603        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions1.cpp.h \
     604        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions2.cpp.h \
     605        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions3.cpp.h \
     606        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions4.cpp.h \
    600607        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeLiveness.cpp.h
    601608 endif
     
    609616        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMThreadedInstructions4.cpp.h \
    610617        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.h \
    611         $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.cpp.h \
     618        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions1.cpp.h \
     619        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions2.cpp.h \
     620        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions3.cpp.h \
     621        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions4.cpp.h \
    612622        $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeLiveness.cpp.h
    613623 $(call KB_FN_AUTO_CMD_DEPS,$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMThreadedFunctions.h.ts)
     
    643653                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMThreadedInstructions4.cpp.h.ts" \
    644654                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMThreadedFunctions.cpp.h.ts" \
    645                 "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMThreadedFunctions.h.ts"
     655                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.h.ts" \
     656                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions1.cpp.h.ts" \
     657                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions2.cpp.h.ts" \
     658                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions3.cpp.h.ts" \
     659                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions4.cpp.h.ts" \
     660                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeLiveness.cpp.h.ts"
    646661        $(QUIET)$(MKDIR) -p -- "$(dir $@)"
    647662        $(call KB_FN_AUTO_CMD_DEPS_COMMANDS)
     
    657672                        --native \
    658673                        --out-n8ve-funcs-hdr    "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.h.ts" \
    659                         --out-n8ve-funcs-cpp    "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.cpp.h.ts" \
     674                        --out-n8ve-funcs-cpp1   "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions1.cpp.h.ts" \
     675                        --out-n8ve-funcs-cpp2   "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions2.cpp.h.ts" \
     676                        --out-n8ve-funcs-cpp3   "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions3.cpp.h.ts" \
     677                        --out-n8ve-funcs-cpp4   "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions4.cpp.h.ts" \
    660678                        --out-n8ve-liveness-cpp "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeLiveness.cpp.h.ts" \
    661679                ,)
     
    683701                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.h"
    684702        $(QUIET)$(CP) -v -f --changed -- \
    685                 "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.cpp.h.ts" \
    686                 "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.cpp.h"
     703                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions1.cpp.h.ts" \
     704                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions1.cpp.h"
     705        $(QUIET)$(CP) -v -f --changed -- \
     706                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions2.cpp.h.ts" \
     707                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions2.cpp.h"
     708        $(QUIET)$(CP) -v -f --changed -- \
     709                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions3.cpp.h.ts" \
     710                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions3.cpp.h"
     711        $(QUIET)$(CP) -v -f --changed -- \
     712                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions4.cpp.h.ts" \
     713                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions4.cpp.h"
    687714        $(QUIET)$(CP) -v -f --changed -- \
    688715                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeLiveness.cpp.h.ts" \
     
    696723                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMThreadedFunctions.cpp.h.ts" \
    697724                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.h.ts" \
    698                 "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions.cpp.h.ts"
     725                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions1.cpp.h.ts" \
     726                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions2.cpp.h.ts" \
     727                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions3.cpp.h.ts" \
     728                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeFunctions4.cpp.h.ts" \
     729                "$(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMNativeLiveness.cpp.h.ts"
    699730
    700731 foobared: $(VBoxVMM_0_OUTDIR)/CommonGenIncs/IEMThreadedFunctions.h.ts
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

    r103801 r103807  
    11/* $Id$ */
    22/** @file
    3  * IEM - Native Recompiler
    4  *
    5  * Logging group IEM_RE_NATIVE assignments:
    6  *      - Level 1  (Log)  : ...
    7  *      - Flow  (LogFlow) : ...
    8  *      - Level 2  (Log2) : Details calls as they're recompiled.
    9  *      - Level 3  (Log3) : Disassemble native code after recompiling.
    10  *      - Level 4  (Log4) : ...
    11  *      - Level 5  (Log5) : ...
    12  *      - Level 6  (Log6) : ...
    13  *      - Level 7  (Log7) : ...
    14  *      - Level 8  (Log8) : ...
    15  *      - Level 9  (Log9) : ...
    16  *      - Level 10 (Log10): ...
    17  *      - Level 11 (Log11): Variable allocator.
    18  *      - Level 12 (Log12): Register allocator.
     3 * IEM - Native Recompiler - Inlined Bits.
    194 */
    205
     
    4934#define VMCPU_INCL_CPUM_GST_CTX
    5035#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
     36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
    5137#include <VBox/vmm/iem.h>
    5238#include <VBox/vmm/cpum.h>
     
    6854#endif
    6955
    70 #ifdef RT_OS_WINDOWS
    71 # include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
    72 extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
    73 extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
    74 #else
    75 # include <iprt/formats/dwarf.h>
    76 # if defined(RT_OS_DARWIN)
    77 #  include <libkern/OSCacheControl.h>
    78 #  define IEMNATIVE_USE_LIBUNWIND
    79 extern "C" void  __register_frame(const void *pvFde);
    80 extern "C" void  __deregister_frame(const void *pvFde);
    81 # else
    82 #  ifdef DEBUG_bird /** @todo not thread safe yet */
    83 #   define IEMNATIVE_USE_GDB_JIT
    84 #  endif
    85 #  ifdef IEMNATIVE_USE_GDB_JIT
    86 #   include <iprt/critsect.h>
    87 #   include <iprt/once.h>
    88 #   include <iprt/formats/elf64.h>
    89 #  endif
    90 extern "C" void  __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
    91 extern "C" void *__deregister_frame_info(void *pvBegin);           /* (returns pvObj from __register_frame_info call) */
    92 # endif
    93 #endif
    94 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    95 # include "/opt/local/include/capstone/capstone.h"
    96 #endif
    97 
    9856#include "IEMInline.h"
    9957#include "IEMThreadedFunctions.h"
     
    12179#endif
    12280
    123 /** @todo eliminate this clang build hack. */
    124 #if RT_CLANG_PREREQ(4, 0)
    125 # pragma GCC diagnostic ignored "-Wunused-function"
    126 #endif
    12781
    12882
    12983/*********************************************************************************************************************************
    130 *   Internal Functions                                                                                                           *
     84*   Code emitters for flushing pending guest register writes and sanity checks                                                   *
    13185*********************************************************************************************************************************/
    132 #ifdef VBOX_STRICT
    133 static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    134                                                 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
    135 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    136 static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
    137                                                     IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
    138 # endif
    139 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
    140 #endif
    141 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    142 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
    143 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
    144 #endif
    145 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
    146 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
    147                                                             IEMNATIVEGSTREG enmGstReg, uint32_t off);
    148 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
    149 
    150 
    151 /*********************************************************************************************************************************
    152 *   Executable Memory Allocator                                                                                                  *
    153 *********************************************************************************************************************************/
    154 /** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    155  * Use an alternative chunk sub-allocator that does store internal data
    156  * in the chunk.
    157  *
    158  * Using the RTHeapSimple is not practial on newer darwin systems where
    159  * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
    160  * memory.  We would have to change the protection of the whole chunk for
    161  * every call to RTHeapSimple, which would be rather expensive.
    162  *
    163  * This alternative implemenation let restrict page protection modifications
    164  * to the pages backing the executable memory we just allocated.
    165  */
    166 #define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    167 /** The chunk sub-allocation unit size in bytes. */
    168 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE      128
    169 /** The chunk sub-allocation unit size as a shift factor. */
    170 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT     7
    171 
    172 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    173 # ifdef IEMNATIVE_USE_GDB_JIT
    174 #   define IEMNATIVE_USE_GDB_JIT_ET_DYN
    175 
    176 /** GDB JIT: Code entry.   */
    177 typedef struct GDBJITCODEENTRY
    178 {
    179     struct GDBJITCODEENTRY *pNext;
    180     struct GDBJITCODEENTRY *pPrev;
    181     uint8_t                *pbSymFile;
    182     uint64_t                cbSymFile;
    183 } GDBJITCODEENTRY;
    184 
    185 /** GDB JIT: Actions. */
    186 typedef enum GDBJITACTIONS : uint32_t
    187 {
    188     kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
    189 } GDBJITACTIONS;
    190 
    191 /** GDB JIT: Descriptor. */
    192 typedef struct GDBJITDESCRIPTOR
    193 {
    194     uint32_t            uVersion;
    195     GDBJITACTIONS       enmAction;
    196     GDBJITCODEENTRY    *pRelevant;
    197     GDBJITCODEENTRY    *pHead;
    198     /** Our addition: */
    199     GDBJITCODEENTRY    *pTail;
    200 } GDBJITDESCRIPTOR;
    201 
    202 /** GDB JIT: Our simple symbol file data. */
    203 typedef struct GDBJITSYMFILE
    204 {
    205     Elf64_Ehdr          EHdr;
    206 #  ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
    207     Elf64_Shdr          aShdrs[5];
    208 #  else
    209     Elf64_Shdr          aShdrs[7];
    210     Elf64_Phdr          aPhdrs[2];
    211 #  endif
    212     /** The dwarf ehframe data for the chunk. */
    213     uint8_t             abEhFrame[512];
    214     char                szzStrTab[128];
    215     Elf64_Sym           aSymbols[3];
    216 #  ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    217     Elf64_Sym           aDynSyms[2];
    218     Elf64_Dyn           aDyn[6];
    219 #  endif
    220 } GDBJITSYMFILE;
    221 
    222 extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
    223 extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
    224 
    225 /** Init once for g_IemNativeGdbJitLock. */
    226 static RTONCE     g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
    227 /** Init once for the critical section. */
    228 static RTCRITSECT g_IemNativeGdbJitLock;
    229 
    230 /** GDB reads the info here. */
    231 GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
    232 
    233 /** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
    234 DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
    235 {
    236     ASMNopPause();
    237 }
    238 
    239 /** @callback_method_impl{FNRTONCE} */
    240 static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
    241 {
    242     RT_NOREF(pvUser);
    243     return RTCritSectInit(&g_IemNativeGdbJitLock);
    244 }
    245 
    246 
    247 # endif /* IEMNATIVE_USE_GDB_JIT */
    248 
    249 /**
    250  * Per-chunk unwind info for non-windows hosts.
    251  */
    252 typedef struct IEMEXECMEMCHUNKEHFRAME
    253 {
    254 # ifdef IEMNATIVE_USE_LIBUNWIND
    255     /** The offset of the FDA into abEhFrame. */
    256     uintptr_t               offFda;
    257 # else
    258     /** 'struct object' storage area. */
    259     uint8_t                 abObject[1024];
    260 # endif
    261 #  ifdef IEMNATIVE_USE_GDB_JIT
    262 #   if 0
    263     /** The GDB JIT 'symbol file' data. */
    264     GDBJITSYMFILE           GdbJitSymFile;
    265 #   endif
    266     /** The GDB JIT list entry. */
    267     GDBJITCODEENTRY         GdbJitEntry;
    268 #  endif
    269     /** The dwarf ehframe data for the chunk. */
    270     uint8_t                 abEhFrame[512];
    271 } IEMEXECMEMCHUNKEHFRAME;
    272 /** Pointer to per-chunk info info for non-windows hosts. */
    273 typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
    274 #endif
    275 
    276 
    277 /**
    278  * An chunk of executable memory.
    279  */
    280 typedef struct IEMEXECMEMCHUNK
    281 {
    282 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    283     /** Number of free items in this chunk. */
    284     uint32_t                cFreeUnits;
    285     /** Hint were to start searching for free space in the allocation bitmap. */
    286     uint32_t                idxFreeHint;
    287 #else
    288     /** The heap handle. */
    289     RTHEAPSIMPLE            hHeap;
    290 #endif
    291     /** Pointer to the chunk. */
    292     void                   *pvChunk;
    293 #ifdef IN_RING3
    294     /**
    295      * Pointer to the unwind information.
    296      *
    297      * This is used during C++ throw and longjmp (windows and probably most other
    298      * platforms).  Some debuggers (windbg) makes use of it as well.
    299      *
    300      * Windows: This is allocated from hHeap on windows because (at least for
    301      *          AMD64) the UNWIND_INFO structure address in the
    302      *          RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
    303      *
    304      * Others:  Allocated from the regular heap to avoid unnecessary executable data
    305      *          structures.  This points to an IEMEXECMEMCHUNKEHFRAME structure. */
    306     void                   *pvUnwindInfo;
    307 #elif defined(IN_RING0)
    308     /** Allocation handle. */
    309     RTR0MEMOBJ              hMemObj;
    310 #endif
    311 } IEMEXECMEMCHUNK;
    312 /** Pointer to a memory chunk. */
    313 typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
    314 
    315 
    316 /**
    317  * Executable memory allocator for the native recompiler.
    318  */
    319 typedef struct IEMEXECMEMALLOCATOR
    320 {
    321     /** Magic value (IEMEXECMEMALLOCATOR_MAGIC).  */
    322     uint32_t                uMagic;
    323 
    324     /** The chunk size. */
    325     uint32_t                cbChunk;
    326     /** The maximum number of chunks. */
    327     uint32_t                cMaxChunks;
    328     /** The current number of chunks. */
    329     uint32_t                cChunks;
    330     /** Hint where to start looking for available memory. */
    331     uint32_t                idxChunkHint;
    332     /** Statistics: Current number of allocations. */
    333     uint32_t                cAllocations;
    334 
    335     /** The total amount of memory available. */
    336     uint64_t                cbTotal;
    337     /** Total amount of free memory. */
    338     uint64_t                cbFree;
    339     /** Total amount of memory allocated. */
    340     uint64_t                cbAllocated;
    341 
    342 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    343     /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
    344      *
    345      * Since the chunk size is a power of two and the minimum chunk size is a lot
    346      * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
    347      * require a whole number of uint64_t elements in the allocation bitmap.  So,
    348      * for sake of simplicity, they are allocated as one continous chunk for
    349      * simplicity/laziness. */
    350     uint64_t               *pbmAlloc;
    351     /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
    352     uint32_t                cUnitsPerChunk;
    353     /** Number of bitmap elements per chunk (for quickly locating the bitmap
    354      * portion corresponding to an chunk). */
    355     uint32_t                cBitmapElementsPerChunk;
    356 #else
    357     /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
    358      * @{ */
    359     /** The size of the heap internal block header.   This is used to adjust the
    360      * request memory size to make sure there is exacly enough room for a header at
    361      * the end of the blocks we allocate before the next 64 byte alignment line. */
    362     uint32_t                cbHeapBlockHdr;
    363     /** The size of initial heap allocation required make sure the first
    364      *  allocation is correctly aligned. */
    365     uint32_t                cbHeapAlignTweak;
    366     /** The alignment tweak allocation address. */
    367     void                   *pvAlignTweak;
    368     /** @} */
    369 #endif
    370 
    371 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    372     /** Pointer to the array of unwind info running parallel to aChunks (same
    373      * allocation as this structure, located after the bitmaps).
    374      * (For Windows, the structures must reside in 32-bit RVA distance to the
    375      * actual chunk, so they are allocated off the chunk.) */
    376     PIEMEXECMEMCHUNKEHFRAME paEhFrames;
    377 #endif
    378 
    379     /** The allocation chunks. */
    380     RT_FLEXIBLE_ARRAY_EXTENSION
    381     IEMEXECMEMCHUNK         aChunks[RT_FLEXIBLE_ARRAY];
    382 } IEMEXECMEMALLOCATOR;
    383 /** Pointer to an executable memory allocator. */
    384 typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
    385 
    386 /** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
    387 #define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
    388 
    389 
    390 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
    391 
    392 
    393 /**
    394  * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
    395  * the heap statistics.
    396  */
    397 static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
    398                                                uint32_t cbReq, uint32_t idxChunk)
    399 {
    400     pExecMemAllocator->cAllocations += 1;
    401     pExecMemAllocator->cbAllocated  += cbReq;
    402 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    403     pExecMemAllocator->cbFree       -= cbReq;
    404 #else
    405     pExecMemAllocator->cbFree       -= RT_ALIGN_32(cbReq, 64);
    406 #endif
    407     pExecMemAllocator->idxChunkHint  = idxChunk;
    408 
    409 #ifdef RT_OS_DARWIN
    410     /*
    411      * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
    412      * on darwin.  So, we mark the pages returned as read+write after alloc and
    413      * expect the caller to call iemExecMemAllocatorReadyForUse when done
    414      * writing to the allocation.
    415      *
    416      * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    417      * for details.
    418      */
    419     /** @todo detect if this is necessary... it wasn't required on 10.15 or
    420      *        whatever older version it was. */
    421     int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
    422     AssertRC(rc);
    423 #endif
    424 
    425     return pvRet;
    426 }
    427 
    428 
    429 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    430 static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
    431                                                 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
    432 {
    433     /*
    434      * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
    435      */
    436     Assert(!(cToScan & 63));
    437     Assert(!(idxFirst & 63));
    438     Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
    439     pbmAlloc += idxFirst / 64;
    440 
    441     /*
    442      * Scan the bitmap for cReqUnits of consequtive clear bits
    443      */
    444     /** @todo This can probably be done more efficiently for non-x86 systems. */
    445     int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
    446     while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
    447     {
    448         uint32_t idxAddBit = 1;
    449         while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
    450             idxAddBit++;
    451         if (idxAddBit >= cReqUnits)
    452         {
    453             ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
    454 
    455             PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
    456             pChunk->cFreeUnits -= cReqUnits;
    457             pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
    458 
    459             void * const pvRet  = (uint8_t *)pChunk->pvChunk
    460                                 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
    461 
    462             return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
    463                                                     cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
    464         }
    465 
    466         iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
    467     }
    468     return NULL;
    469 }
    470 #endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    471 
    472 
    473 static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
    474 {
    475 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    476     /*
    477      * Figure out how much to allocate.
    478      */
    479     uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    480     if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
    481     {
    482         uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    483         uint32_t const   idxHint  = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
    484         if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
    485         {
    486             void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
    487                                                              pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
    488             if (pvRet)
    489                 return pvRet;
    490         }
    491         return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
    492                                                   RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
    493                                                   cReqUnits, idxChunk);
    494     }
    495 #else
    496     void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
    497     if (pvRet)
    498         return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
    499 #endif
    500     return NULL;
    501 
    502 }
    503 
    504 
    505 /**
    506  * Allocates @a cbReq bytes of executable memory.
    507  *
    508  * @returns Pointer to the memory, NULL if out of memory or other problem
    509  *          encountered.
    510  * @param   pVCpu   The cross context virtual CPU structure of the calling
    511  *                  thread.
    512  * @param   cbReq   How many bytes are required.
    513  */
    514 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
    515 {
    516     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    517     AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
    518     AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
    519 
    520 
    521     for (unsigned iIteration = 0;; iIteration++)
    522     {
    523         /*
    524          * Adjust the request size so it'll fit the allocator alignment/whatnot.
    525          *
    526          * For the RTHeapSimple allocator this means to follow the logic described
    527          * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
    528          * existing chunks if we think we've got sufficient free memory around.
    529          *
    530          * While for the alternative one we just align it up to a whole unit size.
    531          */
    532 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    533         cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    534 #else
    535         cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    536 #endif
    537         if (cbReq <= pExecMemAllocator->cbFree)
    538         {
    539             uint32_t const cChunks      = pExecMemAllocator->cChunks;
    540             uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
    541             for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
    542             {
    543                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    544                 if (pvRet)
    545                     return pvRet;
    546             }
    547             for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
    548             {
    549                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    550                 if (pvRet)
    551                     return pvRet;
    552             }
    553         }
    554 
    555         /*
    556          * Can we grow it with another chunk?
    557          */
    558         if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
    559         {
    560             int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    561             AssertLogRelRCReturn(rc, NULL);
    562 
    563             uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
    564             void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    565             if (pvRet)
    566                 return pvRet;
    567             AssertFailed();
    568         }
    569 
    570         /*
    571          * Try prune native TBs once.
    572          */
    573         if (iIteration == 0)
    574             iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
    575         else
    576         {
    577             /** @todo stats...   */
    578             return NULL;
    579         }
    580     }
    581 
    582 }
    583 
    584 
    585 /** This is a hook that we may need later for changing memory protection back
    586  *  to readonly+exec */
    587 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
    588 {
    589 #ifdef RT_OS_DARWIN
    590     /* See iemExecMemAllocatorAllocTailCode for the explanation. */
    591     int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
    592     AssertRC(rc); RT_NOREF(pVCpu);
    593 
    594     /*
    595      * Flush the instruction cache:
    596      *      https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    597      */
    598     /* sys_dcache_flush(pv, cb); - not necessary */
    599     sys_icache_invalidate(pv, cb);
    600 #else
    601     RT_NOREF(pVCpu, pv, cb);
    602 #endif
    603 }
    604 
    605 
    606 /**
    607  * Frees executable memory.
    608  */
    609 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
    610 {
    611     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    612     Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
    613     Assert(pv);
    614 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    615     Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
    616 #else
    617     Assert(!((uintptr_t)pv & 63));
    618 #endif
    619 
    620     /* Align the size as we did when allocating the block. */
    621 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    622     cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    623 #else
    624     cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    625 #endif
    626 
    627     /* Free it / assert sanity. */
    628 #if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
    629     uint32_t const cChunks = pExecMemAllocator->cChunks;
    630     uint32_t const cbChunk = pExecMemAllocator->cbChunk;
    631     bool           fFound  = false;
    632     for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
    633     {
    634         uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
    635         fFound = offChunk < cbChunk;
    636         if (fFound)
    637         {
    638 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    639             uint32_t const idxFirst  = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    640             uint32_t const cReqUnits = (uint32_t)cb       >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    641 
    642             /* Check that it's valid and free it. */
    643             uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    644             AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
    645             for (uint32_t i = 1; i < cReqUnits; i++)
    646                 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
    647             ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
    648 
    649             pExecMemAllocator->aChunks[idxChunk].cFreeUnits  += cReqUnits;
    650             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = idxFirst;
    651 
    652             /* Update the stats. */
    653             pExecMemAllocator->cbAllocated  -= cb;
    654             pExecMemAllocator->cbFree       += cb;
    655             pExecMemAllocator->cAllocations -= 1;
    656             return;
    657 #else
    658             Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
    659             break;
    660 #endif
    661         }
    662     }
    663 # ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    664     AssertFailed();
    665 # else
    666     Assert(fFound);
    667 # endif
    668 #endif
    669 
    670 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    671     /* Update stats while cb is freshly calculated.*/
    672     pExecMemAllocator->cbAllocated  -= cb;
    673     pExecMemAllocator->cbFree       += RT_ALIGN_Z(cb, 64);
    674     pExecMemAllocator->cAllocations -= 1;
    675 
    676     /* Free it. */
    677     RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
    678 #endif
    679 }
    680 
    681 
    682 
    683 #ifdef IN_RING3
    684 # ifdef RT_OS_WINDOWS
    685 
    686 /**
    687  * Initializes the unwind info structures for windows hosts.
    688  */
    689 static int
    690 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    691                                                      void *pvChunk, uint32_t idxChunk)
    692 {
    693     RT_NOREF(pVCpu);
    694 
    695     /*
    696      * The AMD64 unwind opcodes.
    697      *
    698      * This is a program that starts with RSP after a RET instruction that
    699      * ends up in recompiled code, and the operations we describe here will
    700      * restore all non-volatile registers and bring RSP back to where our
    701      * RET address is.  This means it's reverse order from what happens in
    702      * the prologue.
    703      *
    704      * Note! Using a frame register approach here both because we have one
    705      *       and but mainly because the UWOP_ALLOC_LARGE argument values
    706      *       would be a pain to write initializers for.  On the positive
    707      *       side, we're impervious to changes in the the stack variable
    708      *       area can can deal with dynamic stack allocations if necessary.
    709      */
    710     static const IMAGE_UNWIND_CODE s_aOpcodes[] =
    711     {
    712         { { 16, IMAGE_AMD64_UWOP_SET_FPREG,     0 } },              /* RSP  = RBP - FrameOffset * 10 (0x60) */
    713         { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL,   0 } },              /* RSP += 8; */
    714         { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x15 } },   /* R15  = [RSP]; RSP += 8; */
    715         { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x14 } },   /* R14  = [RSP]; RSP += 8; */
    716         { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x13 } },   /* R13  = [RSP]; RSP += 8; */
    717         { {  8, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x12 } },   /* R12  = [RSP]; RSP += 8; */
    718         { {  7, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xDI } },   /* RDI  = [RSP]; RSP += 8; */
    719         { {  6, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xSI } },   /* RSI  = [RSP]; RSP += 8; */
    720         { {  5, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBX } },   /* RBX  = [RSP]; RSP += 8; */
    721         { {  4, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBP } },   /* RBP  = [RSP]; RSP += 8; */
    722     };
    723     union
    724     {
    725         IMAGE_UNWIND_INFO Info;
    726         uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
    727     } s_UnwindInfo =
    728     {
    729         {
    730             /* .Version = */        1,
    731             /* .Flags = */          0,
    732             /* .SizeOfProlog = */   16, /* whatever */
    733             /* .CountOfCodes = */   RT_ELEMENTS(s_aOpcodes),
    734             /* .FrameRegister = */  X86_GREG_xBP,
    735             /* .FrameOffset = */    (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
    736         }
    737     };
    738     AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
    739     AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
    740 
    741     /*
    742      * Calc how much space we need and allocate it off the exec heap.
    743      */
    744     unsigned const cFunctionEntries = 1;
    745     unsigned const cbUnwindInfo     = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
    746     unsigned const cbNeeded         = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
    747 #  ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    748     unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    749     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
    750         = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
    751 #  else
    752     unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    753                                     - pExecMemAllocator->cbHeapBlockHdr;
    754     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
    755                                                                                                        32 /*cbAlignment*/);
    756 #  endif
    757     AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
    758     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
    759 
    760     /*
    761      * Initialize the structures.
    762      */
    763     PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
    764 
    765     paFunctions[0].BeginAddress         = 0;
    766     paFunctions[0].EndAddress           = pExecMemAllocator->cbChunk;
    767     paFunctions[0].UnwindInfoAddress    = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
    768 
    769     memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
    770     memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
    771 
    772     /*
    773      * Register it.
    774      */
    775     uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
    776     AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
    777 
    778     return VINF_SUCCESS;
    779 }
    780 
    781 
    782 # else /* !RT_OS_WINDOWS */
    783 
    784 /**
    785  * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
    786  */
    787 DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
    788 {
    789     if (iValue >= 64)
    790     {
    791         Assert(iValue < 0x2000);
    792         *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
    793         *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
    794     }
    795     else if (iValue >= 0)
    796         *Ptr.pb++ = (uint8_t)iValue;
    797     else if (iValue > -64)
    798         *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
    799     else
    800     {
    801         Assert(iValue > -0x2000);
    802         *Ptr.pb++ = ((uint8_t)iValue & 0x7f)        | 0x80;
    803         *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
    804     }
    805     return Ptr;
    806 }
    807 
    808 
    809 /**
    810  * Emits an ULEB128 encoded value (up to 64-bit wide).
    811  */
    812 DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
    813 {
    814     while (uValue >= 0x80)
    815     {
    816         *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
    817         uValue  >>= 7;
    818     }
    819     *Ptr.pb++ = (uint8_t)uValue;
    820     return Ptr;
    821 }
    822 
    823 
    824 /**
    825  * Emits a CFA rule as register @a uReg + offset @a off.
    826  */
    827 DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    828 {
    829     *Ptr.pb++ = DW_CFA_def_cfa;
    830     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    831     Ptr = iemDwarfPutUleb128(Ptr, off);
    832     return Ptr;
    833 }
    834 
    835 
    836 /**
    837  * Emits a register (@a uReg) save location:
    838  *      CFA + @a off * data_alignment_factor
    839  */
    840 DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    841 {
    842     if (uReg < 0x40)
    843         *Ptr.pb++ = DW_CFA_offset | uReg;
    844     else
    845     {
    846         *Ptr.pb++ = DW_CFA_offset_extended;
    847         Ptr = iemDwarfPutUleb128(Ptr, uReg);
    848     }
    849     Ptr = iemDwarfPutUleb128(Ptr, off);
    850     return Ptr;
    851 }
    852 
    853 
    854 #  if 0 /* unused */
    855 /**
    856  * Emits a register (@a uReg) save location, using signed offset:
    857  *      CFA + @a offSigned * data_alignment_factor
    858  */
    859 DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
    860 {
    861     *Ptr.pb++ = DW_CFA_offset_extended_sf;
    862     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    863     Ptr = iemDwarfPutLeb128(Ptr, offSigned);
    864     return Ptr;
    865 }
    866 #  endif
    867 
    868 
    869 /**
    870  * Initializes the unwind info section for non-windows hosts.
    871  */
    872 static int
    873 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    874                                                      void *pvChunk, uint32_t idxChunk)
    875 {
    876     PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
    877     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
    878 
    879     RTPTRUNION Ptr = { pEhFrame->abEhFrame };
    880 
    881     /*
    882      * Generate the CIE first.
    883      */
    884 #  ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
    885     uint8_t const iDwarfVer = 3;
    886 #  else
    887     uint8_t const iDwarfVer = 4;
    888 #  endif
    889     RTPTRUNION const PtrCie = Ptr;
    890     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    891     *Ptr.pu32++ = 0 /*UINT32_MAX*/;                         /* I'm a CIE in .eh_frame speak. */
    892     *Ptr.pb++   = iDwarfVer;                                /* DwARF version */
    893     *Ptr.pb++   = 0;                                        /* Augmentation. */
    894     if (iDwarfVer >= 4)
    895     {
    896         *Ptr.pb++   = sizeof(uintptr_t);                    /* Address size. */
    897         *Ptr.pb++   = 0;                                    /* Segment selector size. */
    898     }
    899 #  ifdef RT_ARCH_AMD64
    900     Ptr = iemDwarfPutLeb128(Ptr, 1);                        /* Code alignment factor (LEB128 = 1). */
    901 #  else
    902     Ptr = iemDwarfPutLeb128(Ptr, 4);                        /* Code alignment factor (LEB128 = 4). */
    903 #  endif
    904     Ptr = iemDwarfPutLeb128(Ptr, -8);                       /* Data alignment factor (LEB128 = -8). */
    905 #  ifdef RT_ARCH_AMD64
    906     Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA);          /* Return address column (ULEB128) */
    907 #  elif defined(RT_ARCH_ARM64)
    908     Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR);          /* Return address column (ULEB128) */
    909 #  else
    910 #   error "port me"
    911 #  endif
    912     /* Initial instructions: */
    913 #  ifdef RT_ARCH_AMD64
    914     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16);   /* CFA     = RBP + 0x10 - first stack parameter */
    915     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA,  1);    /* Ret RIP = [CFA + 1*-8] */
    916     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2);    /* RBP     = [CFA + 2*-8] */
    917     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3);    /* RBX     = [CFA + 3*-8] */
    918     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4);    /* R12     = [CFA + 4*-8] */
    919     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5);    /* R13     = [CFA + 5*-8] */
    920     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6);    /* R14     = [CFA + 6*-8] */
    921     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7);    /* R15     = [CFA + 7*-8] */
    922 #  elif defined(RT_ARCH_ARM64)
    923 #   if 1
    924     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP,  16);   /* CFA     = BP + 0x10 - first stack parameter */
    925 #   else
    926     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP,  IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
    927 #   endif
    928     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR,   1);   /* Ret PC  = [CFA + 1*-8] */
    929     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP,   2);   /* Ret BP  = [CFA + 2*-8] */
    930     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28,  3);   /* X28     = [CFA + 3*-8] */
    931     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27,  4);   /* X27     = [CFA + 4*-8] */
    932     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26,  5);   /* X26     = [CFA + 5*-8] */
    933     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25,  6);   /* X25     = [CFA + 6*-8] */
    934     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24,  7);   /* X24     = [CFA + 7*-8] */
    935     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23,  8);   /* X23     = [CFA + 8*-8] */
    936     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22,  9);   /* X22     = [CFA + 9*-8] */
    937     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10);   /* X21     = [CFA +10*-8] */
    938     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11);   /* X20     = [CFA +11*-8] */
    939     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12);   /* X19     = [CFA +12*-8] */
    940     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    941     /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
    942 #  else
    943 #   error "port me"
    944 #  endif
    945     while ((Ptr.u - PtrCie.u) & 3)
    946         *Ptr.pb++ = DW_CFA_nop;
    947     /* Finalize the CIE size. */
    948     *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
    949 
    950     /*
    951      * Generate an FDE for the whole chunk area.
    952      */
    953 #  ifdef IEMNATIVE_USE_LIBUNWIND
    954     pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
    955 #  endif
    956     RTPTRUNION const PtrFde = Ptr;
    957     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    958     *Ptr.pu32   = Ptr.u - PtrCie.u;                         /* Negated self relative CIE address. */
    959     Ptr.pu32++;
    960     *Ptr.pu64++ = (uintptr_t)pvChunk;                       /* Absolute start PC of this FDE. */
    961     *Ptr.pu64++ = pExecMemAllocator->cbChunk;               /* PC range length for this PDE. */
    962 #  if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
    963     *Ptr.pb++ = DW_CFA_nop;
    964 #  endif
    965     while ((Ptr.u - PtrFde.u) & 3)
    966         *Ptr.pb++ = DW_CFA_nop;
    967     /* Finalize the FDE size. */
    968     *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
    969 
    970     /* Terminator entry. */
    971     *Ptr.pu32++ = 0;
    972     *Ptr.pu32++ = 0;            /* just to be sure... */
    973     Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
    974 
    975     /*
    976      * Register it.
    977      */
    978 #  ifdef IEMNATIVE_USE_LIBUNWIND
    979     __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
    980 #  else
    981     memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
    982     __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
    983 #  endif
    984 
    985 #  ifdef IEMNATIVE_USE_GDB_JIT
    986     /*
    987      * Now for telling GDB about this (experimental).
    988      *
    989      * This seems to work best with ET_DYN.
    990      */
    991     unsigned const cbNeeded        = sizeof(GDBJITSYMFILE);
    992 #   ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    993     unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    994     GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
    995 #   else
    996     unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    997                                    - pExecMemAllocator->cbHeapBlockHdr;
    998     GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
    999 #   endif
    1000     AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
    1001     unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
    1002 
    1003     RT_ZERO(*pSymFile);
    1004 
    1005     /*
    1006      * The ELF header:
    1007      */
    1008     pSymFile->EHdr.e_ident[0]           = ELFMAG0;
    1009     pSymFile->EHdr.e_ident[1]           = ELFMAG1;
    1010     pSymFile->EHdr.e_ident[2]           = ELFMAG2;
    1011     pSymFile->EHdr.e_ident[3]           = ELFMAG3;
    1012     pSymFile->EHdr.e_ident[EI_VERSION]  = EV_CURRENT;
    1013     pSymFile->EHdr.e_ident[EI_CLASS]    = ELFCLASS64;
    1014     pSymFile->EHdr.e_ident[EI_DATA]     = ELFDATA2LSB;
    1015     pSymFile->EHdr.e_ident[EI_OSABI]    = ELFOSABI_NONE;
    1016 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1017     pSymFile->EHdr.e_type               = ET_DYN;
    1018 #   else
    1019     pSymFile->EHdr.e_type               = ET_REL;
    1020 #   endif
    1021 #   ifdef RT_ARCH_AMD64
    1022     pSymFile->EHdr.e_machine            = EM_AMD64;
    1023 #   elif defined(RT_ARCH_ARM64)
    1024     pSymFile->EHdr.e_machine            = EM_AARCH64;
    1025 #   else
    1026 #    error "port me"
    1027 #   endif
    1028     pSymFile->EHdr.e_version            = 1; /*?*/
    1029     pSymFile->EHdr.e_entry              = 0;
    1030 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1031     pSymFile->EHdr.e_phoff              = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
    1032 #   else
    1033     pSymFile->EHdr.e_phoff              = 0;
    1034 #   endif
    1035     pSymFile->EHdr.e_shoff              = sizeof(pSymFile->EHdr);
    1036     pSymFile->EHdr.e_flags              = 0;
    1037     pSymFile->EHdr.e_ehsize             = sizeof(pSymFile->EHdr);
    1038 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1039     pSymFile->EHdr.e_phentsize          = sizeof(pSymFile->aPhdrs[0]);
    1040     pSymFile->EHdr.e_phnum              = RT_ELEMENTS(pSymFile->aPhdrs);
    1041 #   else
    1042     pSymFile->EHdr.e_phentsize          = 0;
    1043     pSymFile->EHdr.e_phnum              = 0;
    1044 #   endif
    1045     pSymFile->EHdr.e_shentsize          = sizeof(pSymFile->aShdrs[0]);
    1046     pSymFile->EHdr.e_shnum              = RT_ELEMENTS(pSymFile->aShdrs);
    1047     pSymFile->EHdr.e_shstrndx           = 0; /* set later */
    1048 
    1049     uint32_t offStrTab = 0;
    1050 #define APPEND_STR(a_szStr) do { \
    1051         memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
    1052         offStrTab += sizeof(a_szStr); \
    1053         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1054     } while (0)
    1055 #define APPEND_STR_FMT(a_szStr, ...) do { \
    1056         offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
    1057         offStrTab++; \
    1058         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1059     } while (0)
    1060 
    1061     /*
    1062      * Section headers.
    1063      */
    1064     /* Section header #0: NULL */
    1065     unsigned i = 0;
    1066     APPEND_STR("");
    1067     RT_ZERO(pSymFile->aShdrs[i]);
    1068     i++;
    1069 
    1070     /* Section header: .eh_frame */
    1071     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1072     APPEND_STR(".eh_frame");
    1073     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1074     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1075 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1076     pSymFile->aShdrs[i].sh_offset
    1077         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
    1078 #   else
    1079     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->abEhFrame[0];
    1080     pSymFile->aShdrs[i].sh_offset       = 0;
    1081 #   endif
    1082 
    1083     pSymFile->aShdrs[i].sh_size         = sizeof(pEhFrame->abEhFrame);
    1084     pSymFile->aShdrs[i].sh_link         = 0;
    1085     pSymFile->aShdrs[i].sh_info         = 0;
    1086     pSymFile->aShdrs[i].sh_addralign    = 1;
    1087     pSymFile->aShdrs[i].sh_entsize      = 0;
    1088     memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
    1089     i++;
    1090 
    1091     /* Section header: .shstrtab */
    1092     unsigned const iShStrTab = i;
    1093     pSymFile->EHdr.e_shstrndx           = iShStrTab;
    1094     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1095     APPEND_STR(".shstrtab");
    1096     pSymFile->aShdrs[i].sh_type         = SHT_STRTAB;
    1097     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1098 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1099     pSymFile->aShdrs[i].sh_offset
    1100         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1101 #   else
    1102     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->szzStrTab[0];
    1103     pSymFile->aShdrs[i].sh_offset       = 0;
    1104 #   endif
    1105     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->szzStrTab);
    1106     pSymFile->aShdrs[i].sh_link         = 0;
    1107     pSymFile->aShdrs[i].sh_info         = 0;
    1108     pSymFile->aShdrs[i].sh_addralign    = 1;
    1109     pSymFile->aShdrs[i].sh_entsize      = 0;
    1110     i++;
    1111 
    1112     /* Section header: .symbols */
    1113     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1114     APPEND_STR(".symtab");
    1115     pSymFile->aShdrs[i].sh_type         = SHT_SYMTAB;
    1116     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1117     pSymFile->aShdrs[i].sh_offset
    1118         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
    1119     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aSymbols);
    1120     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1121     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aSymbols);
    1122     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aSymbols[0].st_value);
    1123     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aSymbols[0]);
    1124     i++;
    1125 
    1126 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1127     /* Section header: .symbols */
    1128     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1129     APPEND_STR(".dynsym");
    1130     pSymFile->aShdrs[i].sh_type         = SHT_DYNSYM;
    1131     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1132     pSymFile->aShdrs[i].sh_offset
    1133         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1134     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDynSyms);
    1135     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1136     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aDynSyms);
    1137     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aDynSyms[0].st_value);
    1138     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDynSyms[0]);
    1139     i++;
    1140 #   endif
    1141 
    1142 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1143     /* Section header: .dynamic */
    1144     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1145     APPEND_STR(".dynamic");
    1146     pSymFile->aShdrs[i].sh_type         = SHT_DYNAMIC;
    1147     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1148     pSymFile->aShdrs[i].sh_offset
    1149         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1150     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDyn);
    1151     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1152     pSymFile->aShdrs[i].sh_info         = 0;
    1153     pSymFile->aShdrs[i].sh_addralign    = 1;
    1154     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDyn[0]);
    1155     i++;
    1156 #   endif
    1157 
    1158     /* Section header: .text */
    1159     unsigned const iShText = i;
    1160     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1161     APPEND_STR(".text");
    1162     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1163     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1164 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1165     pSymFile->aShdrs[i].sh_offset
    1166         = pSymFile->aShdrs[i].sh_addr   = sizeof(GDBJITSYMFILE);
    1167 #   else
    1168     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)(pSymFile + 1);
    1169     pSymFile->aShdrs[i].sh_offset       = 0;
    1170 #   endif
    1171     pSymFile->aShdrs[i].sh_size         = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
    1172     pSymFile->aShdrs[i].sh_link         = 0;
    1173     pSymFile->aShdrs[i].sh_info         = 0;
    1174     pSymFile->aShdrs[i].sh_addralign    = 1;
    1175     pSymFile->aShdrs[i].sh_entsize      = 0;
    1176     i++;
    1177 
    1178     Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
    1179 
    1180 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1181     /*
    1182      * The program headers:
    1183      */
    1184     /* Everything in a single LOAD segment: */
    1185     i = 0;
    1186     pSymFile->aPhdrs[i].p_type          = PT_LOAD;
    1187     pSymFile->aPhdrs[i].p_flags         = PF_X | PF_R;
    1188     pSymFile->aPhdrs[i].p_offset
    1189         = pSymFile->aPhdrs[i].p_vaddr
    1190         = pSymFile->aPhdrs[i].p_paddr   = 0;
    1191     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1192         = pSymFile->aPhdrs[i].p_memsz   = pExecMemAllocator->cbChunk - offSymFileInChunk;
    1193     pSymFile->aPhdrs[i].p_align         = HOST_PAGE_SIZE;
    1194     i++;
    1195     /* The .dynamic segment. */
    1196     pSymFile->aPhdrs[i].p_type          = PT_DYNAMIC;
    1197     pSymFile->aPhdrs[i].p_flags         = PF_R;
    1198     pSymFile->aPhdrs[i].p_offset
    1199         = pSymFile->aPhdrs[i].p_vaddr
    1200         = pSymFile->aPhdrs[i].p_paddr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1201     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1202         = pSymFile->aPhdrs[i].p_memsz   = sizeof(pSymFile->aDyn);
    1203     pSymFile->aPhdrs[i].p_align         = sizeof(pSymFile->aDyn[0].d_tag);
    1204     i++;
    1205 
    1206     Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
    1207 
    1208     /*
    1209      * The dynamic section:
    1210      */
    1211     i = 0;
    1212     pSymFile->aDyn[i].d_tag             = DT_SONAME;
    1213     pSymFile->aDyn[i].d_un.d_val        = offStrTab;
    1214     APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
    1215     i++;
    1216     pSymFile->aDyn[i].d_tag             = DT_STRTAB;
    1217     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1218     i++;
    1219     pSymFile->aDyn[i].d_tag             = DT_STRSZ;
    1220     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->szzStrTab);
    1221     i++;
    1222     pSymFile->aDyn[i].d_tag             = DT_SYMTAB;
    1223     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1224     i++;
    1225     pSymFile->aDyn[i].d_tag             = DT_SYMENT;
    1226     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->aDynSyms[0]);
    1227     i++;
    1228     pSymFile->aDyn[i].d_tag             = DT_NULL;
    1229     i++;
    1230     Assert(i == RT_ELEMENTS(pSymFile->aDyn));
    1231 #   endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
    1232 
    1233     /*
    1234      * Symbol tables:
    1235      */
    1236     /** @todo gdb doesn't seem to really like this ...   */
    1237     i = 0;
    1238     pSymFile->aSymbols[i].st_name       = 0;
    1239     pSymFile->aSymbols[i].st_shndx      = SHN_UNDEF;
    1240     pSymFile->aSymbols[i].st_value      = 0;
    1241     pSymFile->aSymbols[i].st_size       = 0;
    1242     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
    1243     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1244 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1245     pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
    1246 #   endif
    1247     i++;
    1248 
    1249     pSymFile->aSymbols[i].st_name       = 0;
    1250     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1251     pSymFile->aSymbols[i].st_value      = 0;
    1252     pSymFile->aSymbols[i].st_size       = 0;
    1253     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
    1254     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1255     i++;
    1256 
    1257     pSymFile->aSymbols[i].st_name       = offStrTab;
    1258     APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
    1259 #   if 0
    1260     pSymFile->aSymbols[i].st_shndx      = iShText;
    1261     pSymFile->aSymbols[i].st_value      = 0;
    1262 #   else
    1263     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1264     pSymFile->aSymbols[i].st_value      = (uintptr_t)(pSymFile + 1);
    1265 #   endif
    1266     pSymFile->aSymbols[i].st_size       = pSymFile->aShdrs[iShText].sh_size;
    1267     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
    1268     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1269 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1270     pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
    1271     pSymFile->aDynSyms[1].st_value      = (uintptr_t)(pSymFile + 1);
    1272 #   endif
    1273     i++;
    1274 
    1275     Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
    1276     Assert(offStrTab < sizeof(pSymFile->szzStrTab));
    1277 
    1278     /*
    1279      * The GDB JIT entry and informing GDB.
    1280      */
    1281     pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
    1282 #   if 1
    1283     pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
    1284 #   else
    1285     pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
    1286 #   endif
    1287 
    1288     RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
    1289     RTCritSectEnter(&g_IemNativeGdbJitLock);
    1290     pEhFrame->GdbJitEntry.pNext      = NULL;
    1291     pEhFrame->GdbJitEntry.pPrev      = __jit_debug_descriptor.pTail;
    1292     if (__jit_debug_descriptor.pTail)
    1293         __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
    1294     else
    1295         __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
    1296     __jit_debug_descriptor.pTail     = &pEhFrame->GdbJitEntry;
    1297     __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
    1298 
    1299     /* Notify GDB: */
    1300     __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
    1301     __jit_debug_register_code();
    1302     __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
    1303     RTCritSectLeave(&g_IemNativeGdbJitLock);
    1304 
    1305 #  else  /* !IEMNATIVE_USE_GDB_JIT */
    1306     RT_NOREF(pVCpu);
    1307 #  endif /* !IEMNATIVE_USE_GDB_JIT */
    1308 
    1309     return VINF_SUCCESS;
    1310 }
    1311 
    1312 # endif /* !RT_OS_WINDOWS */
    1313 #endif /* IN_RING3 */
    1314 
    1315 
    1316 /**
    1317  * Adds another chunk to the executable memory allocator.
    1318  *
    1319  * This is used by the init code for the initial allocation and later by the
    1320  * regular allocator function when it's out of memory.
    1321  */
    1322 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
    1323 {
    1324     /* Check that we've room for growth. */
    1325     uint32_t const idxChunk = pExecMemAllocator->cChunks;
    1326     AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
    1327 
    1328     /* Allocate a chunk. */
    1329 #ifdef RT_OS_DARWIN
    1330     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
    1331 #else
    1332     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
    1333 #endif
    1334     AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
    1335 
    1336 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1337     int rc = VINF_SUCCESS;
    1338 #else
    1339     /* Initialize the heap for the chunk. */
    1340     RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
    1341     int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
    1342     AssertRC(rc);
    1343     if (RT_SUCCESS(rc))
    1344     {
    1345         /*
    1346          * We want the memory to be aligned on 64 byte, so the first time thru
    1347          * here we do some exploratory allocations to see how we can achieve this.
    1348          * On subsequent runs we only make an initial adjustment allocation, if
    1349          * necessary.
    1350          *
    1351          * Since we own the heap implementation, we know that the internal block
    1352          * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
    1353          * so all we need to wrt allocation size adjustments is to add 32 bytes
    1354          * to the size, align up by 64 bytes, and subtract 32 bytes.
    1355          *
    1356          * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
    1357          * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
    1358          * allocation to force subsequent allocations to return 64 byte aligned
    1359          * user areas.
    1360          */
    1361         if (!pExecMemAllocator->cbHeapBlockHdr)
    1362         {
    1363             pExecMemAllocator->cbHeapBlockHdr   = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
    1364             pExecMemAllocator->cbHeapAlignTweak = 64;
    1365             pExecMemAllocator->pvAlignTweak     = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
    1366                                                                     32 /*cbAlignment*/);
    1367             AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
    1368 
    1369             void *pvTest1 = RTHeapSimpleAlloc(hHeap,
    1370                                                 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
    1371                                               - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
    1372             AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
    1373             AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
    1374 
    1375             void *pvTest2 = RTHeapSimpleAlloc(hHeap,
    1376                                                 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
    1377                                               - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
    1378             AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
    1379             AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
    1380 
    1381             RTHeapSimpleFree(hHeap, pvTest2);
    1382             RTHeapSimpleFree(hHeap, pvTest1);
    1383         }
    1384         else
    1385         {
    1386             pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap,  pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
    1387             AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
    1388         }
    1389         if (RT_SUCCESS(rc))
    1390 #endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    1391         {
    1392             /*
    1393              * Add the chunk.
    1394              *
    1395              * This must be done before the unwind init so windows can allocate
    1396              * memory from the chunk when using the alternative sub-allocator.
    1397              */
    1398             pExecMemAllocator->aChunks[idxChunk].pvChunk      = pvChunk;
    1399 #ifdef IN_RING3
    1400             pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
    1401 #endif
    1402 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1403             pExecMemAllocator->aChunks[idxChunk].hHeap        = hHeap;
    1404 #else
    1405             pExecMemAllocator->aChunks[idxChunk].cFreeUnits   = pExecMemAllocator->cUnitsPerChunk;
    1406             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = 0;
    1407             memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1408                    0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1409 #endif
    1410 
    1411             pExecMemAllocator->cChunks      = idxChunk + 1;
    1412             pExecMemAllocator->idxChunkHint = idxChunk;
    1413 
    1414 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1415             pExecMemAllocator->cbTotal     += pExecMemAllocator->cbChunk;
    1416             pExecMemAllocator->cbFree      += pExecMemAllocator->cbChunk;
    1417 #else
    1418             size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
    1419             pExecMemAllocator->cbTotal     += cbFree;
    1420             pExecMemAllocator->cbFree      += cbFree;
    1421 #endif
    1422 
    1423 #ifdef IN_RING3
    1424             /*
    1425              * Initialize the unwind information (this cannot really fail atm).
    1426              * (This sets pvUnwindInfo.)
    1427              */
    1428             rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
    1429             if (RT_SUCCESS(rc))
    1430 #endif
    1431             {
    1432                 return VINF_SUCCESS;
    1433             }
    1434 
    1435 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1436             /* Just in case the impossible happens, undo the above up: */
    1437             pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
    1438             pExecMemAllocator->cbFree  -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1439             pExecMemAllocator->cChunks  = idxChunk;
    1440             memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1441                    0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1442             pExecMemAllocator->aChunks[idxChunk].pvChunk    = NULL;
    1443             pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
    1444 #endif
    1445         }
    1446 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1447     }
    1448 #endif
    1449     RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
    1450     RT_NOREF(pVCpu);
    1451     return rc;
    1452 }
    1453 
    1454 
    1455 /**
    1456  * Initializes the executable memory allocator for native recompilation on the
    1457  * calling EMT.
    1458  *
    1459  * @returns VBox status code.
    1460  * @param   pVCpu       The cross context virtual CPU structure of the calling
    1461  *                      thread.
    1462  * @param   cbMax       The max size of the allocator.
    1463  * @param   cbInitial   The initial allocator size.
    1464  * @param   cbChunk     The chunk size, 0 or UINT32_MAX for default (@a cbMax
    1465  *                      dependent).
    1466  */
    1467 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
    1468 {
    1469     /*
    1470      * Validate input.
    1471      */
    1472     AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
    1473     AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
    1474     AssertLogRelMsgReturn(   cbChunk != UINT32_MAX
    1475                           || cbChunk == 0
    1476                           || (   RT_IS_POWER_OF_TWO(cbChunk)
    1477                               && cbChunk >= _1M
    1478                               && cbChunk <= _256M
    1479                               && cbChunk <= cbMax),
    1480                           ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
    1481                           VERR_OUT_OF_RANGE);
    1482 
    1483     /*
    1484      * Adjust/figure out the chunk size.
    1485      */
    1486     if (cbChunk == 0 || cbChunk == UINT32_MAX)
    1487     {
    1488         if (cbMax >= _256M)
    1489             cbChunk = _64M;
    1490         else
    1491         {
    1492             if (cbMax < _16M)
    1493                 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
    1494             else
    1495                 cbChunk = (uint32_t)cbMax / 4;
    1496             if (!RT_IS_POWER_OF_TWO(cbChunk))
    1497                 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
    1498         }
    1499     }
    1500 
    1501     if (cbChunk > cbMax)
    1502         cbMax = cbChunk;
    1503     else
    1504         cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
    1505     uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
    1506     AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
    1507 
    1508     /*
    1509      * Allocate and initialize the allocatore instance.
    1510      */
    1511     size_t       cbNeeded   = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
    1512 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1513     size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1514     size_t const cbBitmap   = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
    1515     cbNeeded += cbBitmap * cMaxChunks;
    1516     AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
    1517     Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
    1518 #endif
    1519 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1520     size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1521     cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
    1522 #endif
    1523     PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
    1524     AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
    1525                           VERR_NO_MEMORY);
    1526     pExecMemAllocator->uMagic       = IEMEXECMEMALLOCATOR_MAGIC;
    1527     pExecMemAllocator->cbChunk      = cbChunk;
    1528     pExecMemAllocator->cMaxChunks   = cMaxChunks;
    1529     pExecMemAllocator->cChunks      = 0;
    1530     pExecMemAllocator->idxChunkHint = 0;
    1531     pExecMemAllocator->cAllocations = 0;
    1532     pExecMemAllocator->cbTotal      = 0;
    1533     pExecMemAllocator->cbFree       = 0;
    1534     pExecMemAllocator->cbAllocated  = 0;
    1535 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1536     pExecMemAllocator->pbmAlloc                 = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
    1537     pExecMemAllocator->cUnitsPerChunk           = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1538     pExecMemAllocator->cBitmapElementsPerChunk  = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
    1539     memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
    1540 #endif
    1541 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1542     pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
    1543 #endif
    1544     for (uint32_t i = 0; i < cMaxChunks; i++)
    1545     {
    1546 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1547         pExecMemAllocator->aChunks[i].cFreeUnits   = 0;
    1548         pExecMemAllocator->aChunks[i].idxFreeHint  = 0;
    1549 #else
    1550         pExecMemAllocator->aChunks[i].hHeap        = NIL_RTHEAPSIMPLE;
    1551 #endif
    1552         pExecMemAllocator->aChunks[i].pvChunk      = NULL;
    1553 #ifdef IN_RING0
    1554         pExecMemAllocator->aChunks[i].hMemObj      = NIL_RTR0MEMOBJ;
    1555 #else
    1556         pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
    1557 #endif
    1558     }
    1559     pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
    1560 
    1561     /*
    1562      * Do the initial allocations.
    1563      */
    1564     while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
    1565     {
    1566         int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    1567         AssertLogRelRCReturn(rc, rc);
    1568     }
    1569 
    1570     pExecMemAllocator->idxChunkHint = 0;
    1571 
    1572     return VINF_SUCCESS;
    1573 }
    1574 
    1575 
    1576 /*********************************************************************************************************************************
    1577 *   Native Recompilation                                                                                                         *
    1578 *********************************************************************************************************************************/
    1579 
    1580 
    1581 /**
    1582  * Used by TB code when encountering a non-zero status or rcPassUp after a call.
    1583  */
    1584 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
    1585 {
    1586     pVCpu->iem.s.cInstructions += idxInstr;
    1587     return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
    1588 }
    1589 
    1590 
    1591 /**
    1592  * Used by TB code when it wants to raise a \#GP(0).
    1593  */
    1594 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
    1595 {
    1596     iemRaiseGeneralProtectionFault0Jmp(pVCpu);
    1597 #ifndef _MSC_VER
    1598     return VINF_IEM_RAISED_XCPT; /* not reached */
    1599 #endif
    1600 }
    1601 
    1602 
    1603 /**
    1604  * Used by TB code when it wants to raise a \#NM.
    1605  */
    1606 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
    1607 {
    1608     iemRaiseDeviceNotAvailableJmp(pVCpu);
    1609 #ifndef _MSC_VER
    1610     return VINF_IEM_RAISED_XCPT; /* not reached */
    1611 #endif
    1612 }
    1613 
    1614 
    1615 /**
    1616  * Used by TB code when it wants to raise a \#UD.
    1617  */
    1618 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
    1619 {
    1620     iemRaiseUndefinedOpcodeJmp(pVCpu);
    1621 #ifndef _MSC_VER
    1622     return VINF_IEM_RAISED_XCPT; /* not reached */
    1623 #endif
    1624 }
    1625 
    1626 
    1627 /**
    1628  * Used by TB code when it wants to raise a \#MF.
    1629  */
    1630 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
    1631 {
    1632     iemRaiseMathFaultJmp(pVCpu);
    1633 #ifndef _MSC_VER
    1634     return VINF_IEM_RAISED_XCPT; /* not reached */
    1635 #endif
    1636 }
    1637 
    1638 
    1639 /**
    1640  * Used by TB code when it wants to raise a \#XF.
    1641  */
    1642 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
    1643 {
    1644     iemRaiseSimdFpExceptionJmp(pVCpu);
    1645 #ifndef _MSC_VER
    1646     return VINF_IEM_RAISED_XCPT; /* not reached */
    1647 #endif
    1648 }
    1649 
    1650 
    1651 /**
    1652  * Used by TB code when detecting opcode changes.
    1653  * @see iemThreadeFuncWorkerObsoleteTb
    1654  */
    1655 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
    1656 {
    1657     /* We set fSafeToFree to false where as we're being called in the context
    1658        of a TB callback function, which for native TBs means we cannot release
    1659        the executable memory till we've returned our way back to iemTbExec as
    1660        that return path codes via the native code generated for the TB. */
    1661     Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
    1662     iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
    1663     return VINF_IEM_REEXEC_BREAK;
    1664 }
    1665 
    1666 
    1667 /**
    1668  * Used by TB code when we need to switch to a TB with CS.LIM checking.
    1669  */
    1670 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
    1671 {
    1672     Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
    1673           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1674           (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
    1675           pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
    1676     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
    1677     return VINF_IEM_REEXEC_BREAK;
    1678 }
    1679 
    1680 
    1681 /**
    1682  * Used by TB code when we missed a PC check after a branch.
    1683  */
    1684 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
    1685 {
    1686     Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
    1687           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1688           pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
    1689           pVCpu->iem.s.pbInstrBuf));
    1690     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
    1691     return VINF_IEM_REEXEC_BREAK;
    1692 }
    1693 
    1694 
    1695 
    1696 /*********************************************************************************************************************************
    1697 *   Helpers: Segmented memory fetches and stores.                                                                                *
    1698 *********************************************************************************************************************************/
    1699 
    1700 /**
    1701  * Used by TB code to load unsigned 8-bit data w/ segmentation.
    1702  */
    1703 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1704 {
    1705 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1706     return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1707 #else
    1708     return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1709 #endif
    1710 }
    1711 
    1712 
    1713 /**
    1714  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1715  * to 16 bits.
    1716  */
    1717 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1718 {
    1719 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1720     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1721 #else
    1722     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1723 #endif
    1724 }
    1725 
    1726 
    1727 /**
    1728  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1729  * to 32 bits.
    1730  */
    1731 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1732 {
    1733 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1734     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1735 #else
    1736     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1737 #endif
    1738 }
    1739 
    1740 /**
    1741  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1742  * to 64 bits.
    1743  */
    1744 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1745 {
    1746 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1747     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1748 #else
    1749     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1750 #endif
    1751 }
    1752 
    1753 
    1754 /**
    1755  * Used by TB code to load unsigned 16-bit data w/ segmentation.
    1756  */
    1757 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1758 {
    1759 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1760     return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1761 #else
    1762     return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1763 #endif
    1764 }
    1765 
    1766 
    1767 /**
    1768  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1769  * to 32 bits.
    1770  */
    1771 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1772 {
    1773 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1774     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1775 #else
    1776     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1777 #endif
    1778 }
    1779 
    1780 
    1781 /**
    1782  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1783  * to 64 bits.
    1784  */
    1785 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1786 {
    1787 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1788     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1789 #else
    1790     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1791 #endif
    1792 }
    1793 
    1794 
    1795 /**
    1796  * Used by TB code to load unsigned 32-bit data w/ segmentation.
    1797  */
    1798 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1799 {
    1800 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1801     return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1802 #else
    1803     return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
    1804 #endif
    1805 }
    1806 
    1807 
    1808 /**
    1809  * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
    1810  * to 64 bits.
    1811  */
    1812 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1813 {
    1814 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1815     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1816 #else
    1817     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
    1818 #endif
    1819 }
    1820 
    1821 
    1822 /**
    1823  * Used by TB code to load unsigned 64-bit data w/ segmentation.
    1824  */
    1825 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1826 {
    1827 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1828     return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1829 #else
    1830     return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
    1831 #endif
    1832 }
    1833 
    1834 
    1835 /**
    1836  * Used by TB code to store unsigned 8-bit data w/ segmentation.
    1837  */
    1838 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
    1839 {
    1840 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1841     iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
    1842 #else
    1843     iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
    1844 #endif
    1845 }
    1846 
    1847 
    1848 /**
    1849  * Used by TB code to store unsigned 16-bit data w/ segmentation.
    1850  */
    1851 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
    1852 {
    1853 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1854     iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
    1855 #else
    1856     iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
    1857 #endif
    1858 }
    1859 
    1860 
    1861 /**
    1862  * Used by TB code to store unsigned 32-bit data w/ segmentation.
    1863  */
    1864 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
    1865 {
    1866 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1867     iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
    1868 #else
    1869     iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
    1870 #endif
    1871 }
    1872 
    1873 
    1874 /**
    1875  * Used by TB code to store unsigned 64-bit data w/ segmentation.
    1876  */
    1877 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
    1878 {
    1879 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1880     iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
    1881 #else
    1882     iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
    1883 #endif
    1884 }
    1885 
    1886 
    1887 
    1888 /**
    1889  * Used by TB code to store an unsigned 16-bit value onto a generic stack.
    1890  */
    1891 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    1892 {
    1893 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1894     iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
    1895 #else
    1896     iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
    1897 #endif
    1898 }
    1899 
    1900 
    1901 /**
    1902  * Used by TB code to store an unsigned 32-bit value onto a generic stack.
    1903  */
    1904 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    1905 {
    1906 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1907     iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
    1908 #else
    1909     iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
    1910 #endif
    1911 }
    1912 
    1913 
    1914 /**
    1915  * Used by TB code to store an 32-bit selector value onto a generic stack.
    1916  *
    1917  * Intel CPUs doesn't do write a whole dword, thus the special function.
    1918  */
    1919 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    1920 {
    1921 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1922     iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
    1923 #else
    1924     iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
    1925 #endif
    1926 }
    1927 
    1928 
    1929 /**
    1930  * Used by TB code to push unsigned 64-bit value onto a generic stack.
    1931  */
    1932 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    1933 {
    1934 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1935     iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
    1936 #else
    1937     iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
    1938 #endif
    1939 }
    1940 
    1941 
    1942 /**
    1943  * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
    1944  */
    1945 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1946 {
    1947 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1948     return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
    1949 #else
    1950     return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
    1951 #endif
    1952 }
    1953 
    1954 
    1955 /**
    1956  * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
    1957  */
    1958 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1959 {
    1960 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1961     return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
    1962 #else
    1963     return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
    1964 #endif
    1965 }
    1966 
    1967 
    1968 /**
    1969  * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
    1970  */
    1971 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1972 {
    1973 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1974     return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
    1975 #else
    1976     return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
    1977 #endif
    1978 }
    1979 
    1980 
    1981 
    1982 /*********************************************************************************************************************************
    1983 *   Helpers: Flat memory fetches and stores.                                                                                     *
    1984 *********************************************************************************************************************************/
    1985 
    1986 /**
    1987  * Used by TB code to load unsigned 8-bit data w/ flat address.
    1988  * @note Zero extending the value to 64-bit to simplify assembly.
    1989  */
    1990 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1991 {
    1992 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1993     return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    1994 #else
    1995     return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    1996 #endif
    1997 }
    1998 
    1999 
    2000 /**
    2001  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    2002  * to 16 bits.
    2003  * @note Zero extending the value to 64-bit to simplify assembly.
    2004  */
    2005 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2006 {
    2007 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2008     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2009 #else
    2010     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2011 #endif
    2012 }
    2013 
    2014 
    2015 /**
    2016  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    2017  * to 32 bits.
    2018  * @note Zero extending the value to 64-bit to simplify assembly.
    2019  */
    2020 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2021 {
    2022 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2023     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2024 #else
    2025     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2026 #endif
    2027 }
    2028 
    2029 
    2030 /**
    2031  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    2032  * to 64 bits.
    2033  */
    2034 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2035 {
    2036 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2037     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2038 #else
    2039     return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2040 #endif
    2041 }
    2042 
    2043 
    2044 /**
    2045  * Used by TB code to load unsigned 16-bit data w/ flat address.
    2046  * @note Zero extending the value to 64-bit to simplify assembly.
    2047  */
    2048 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2049 {
    2050 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2051     return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2052 #else
    2053     return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2054 #endif
    2055 }
    2056 
    2057 
    2058 /**
    2059  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    2060  * to 32 bits.
    2061  * @note Zero extending the value to 64-bit to simplify assembly.
    2062  */
    2063 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2064 {
    2065 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2066     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2067 #else
    2068     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2069 #endif
    2070 }
    2071 
    2072 
    2073 /**
    2074  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    2075  * to 64 bits.
    2076  * @note Zero extending the value to 64-bit to simplify assembly.
    2077  */
    2078 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2079 {
    2080 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2081     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2082 #else
    2083     return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2084 #endif
    2085 }
    2086 
    2087 
    2088 /**
    2089  * Used by TB code to load unsigned 32-bit data w/ flat address.
    2090  * @note Zero extending the value to 64-bit to simplify assembly.
    2091  */
    2092 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2093 {
    2094 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2095     return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2096 #else
    2097     return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
    2098 #endif
    2099 }
    2100 
    2101 
    2102 /**
    2103  * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
    2104  * to 64 bits.
    2105  * @note Zero extending the value to 64-bit to simplify assembly.
    2106  */
    2107 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2108 {
    2109 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2110     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2111 #else
    2112     return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
    2113 #endif
    2114 }
    2115 
    2116 
    2117 /**
    2118  * Used by TB code to load unsigned 64-bit data w/ flat address.
    2119  */
    2120 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2121 {
    2122 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2123     return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2124 #else
    2125     return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
    2126 #endif
    2127 }
    2128 
    2129 
    2130 /**
    2131  * Used by TB code to store unsigned 8-bit data w/ flat address.
    2132  */
    2133 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
    2134 {
    2135 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2136     iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
    2137 #else
    2138     iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
    2139 #endif
    2140 }
    2141 
    2142 
    2143 /**
    2144  * Used by TB code to store unsigned 16-bit data w/ flat address.
    2145  */
    2146 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2147 {
    2148 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2149     iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
    2150 #else
    2151     iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
    2152 #endif
    2153 }
    2154 
    2155 
    2156 /**
    2157  * Used by TB code to store unsigned 32-bit data w/ flat address.
    2158  */
    2159 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2160 {
    2161 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2162     iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
    2163 #else
    2164     iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
    2165 #endif
    2166 }
    2167 
    2168 
    2169 /**
    2170  * Used by TB code to store unsigned 64-bit data w/ flat address.
    2171  */
    2172 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2173 {
    2174 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2175     iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
    2176 #else
    2177     iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
    2178 #endif
    2179 }
    2180 
    2181 
    2182 
    2183 /**
    2184  * Used by TB code to store an unsigned 16-bit value onto a flat stack.
    2185  */
    2186 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2187 {
    2188 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2189     iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
    2190 #else
    2191     iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
    2192 #endif
    2193 }
    2194 
    2195 
    2196 /**
    2197  * Used by TB code to store an unsigned 32-bit value onto a flat stack.
    2198  */
    2199 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2200 {
    2201 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2202     iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
    2203 #else
    2204     iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
    2205 #endif
    2206 }
    2207 
    2208 
    2209 /**
    2210  * Used by TB code to store a segment selector value onto a flat stack.
    2211  *
    2212  * Intel CPUs doesn't do write a whole dword, thus the special function.
    2213  */
    2214 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2215 {
    2216 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2217     iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
    2218 #else
    2219     iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
    2220 #endif
    2221 }
    2222 
    2223 
    2224 /**
    2225  * Used by TB code to store an unsigned 64-bit value onto a flat stack.
    2226  */
    2227 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2228 {
    2229 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2230     iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
    2231 #else
    2232     iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
    2233 #endif
    2234 }
    2235 
    2236 
    2237 /**
    2238  * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
    2239  */
    2240 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2241 {
    2242 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2243     return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
    2244 #else
    2245     return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
    2246 #endif
    2247 }
    2248 
    2249 
    2250 /**
    2251  * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
    2252  */
    2253 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2254 {
    2255 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2256     return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
    2257 #else
    2258     return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
    2259 #endif
    2260 }
    2261 
    2262 
    2263 /**
    2264  * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
    2265  */
    2266 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2267 {
    2268 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2269     return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
    2270 #else
    2271     return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
    2272 #endif
    2273 }
    2274 
    2275 
    2276 
    2277 /*********************************************************************************************************************************
    2278 *   Helpers: Segmented memory mapping.                                                                                           *
    2279 *********************************************************************************************************************************/
    2280 
    2281 /**
    2282  * Used by TB code to map unsigned 8-bit data for atomic read-write w/
    2283  * segmentation.
    2284  */
    2285 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2286                                                                    RTGCPTR GCPtrMem, uint8_t iSegReg))
    2287 {
    2288 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2289     return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2290 #else
    2291     return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2292 #endif
    2293 }
    2294 
    2295 
    2296 /**
    2297  * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
    2298  */
    2299 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2300                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2301 {
    2302 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2303     return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2304 #else
    2305     return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2306 #endif
    2307 }
    2308 
    2309 
    2310 /**
    2311  * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
    2312  */
    2313 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2314                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2315 {
    2316 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2317     return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2318 #else
    2319     return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2320 #endif
    2321 }
    2322 
    2323 
    2324 /**
    2325  * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
    2326  */
    2327 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2328                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2329 {
    2330 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2331     return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2332 #else
    2333     return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2334 #endif
    2335 }
    2336 
    2337 
    2338 /**
    2339  * Used by TB code to map unsigned 16-bit data for atomic read-write w/
    2340  * segmentation.
    2341  */
    2342 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2343                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2344 {
    2345 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2346     return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2347 #else
    2348     return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2349 #endif
    2350 }
    2351 
    2352 
    2353 /**
    2354  * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
    2355  */
    2356 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2357                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2358 {
    2359 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2360     return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2361 #else
    2362     return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2363 #endif
    2364 }
    2365 
    2366 
    2367 /**
    2368  * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
    2369  */
    2370 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2371                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2372 {
    2373 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2374     return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2375 #else
    2376     return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2377 #endif
    2378 }
    2379 
    2380 
    2381 /**
    2382  * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
    2383  */
    2384 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2385                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2386 {
    2387 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2388     return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2389 #else
    2390     return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2391 #endif
    2392 }
    2393 
    2394 
    2395 /**
    2396  * Used by TB code to map unsigned 32-bit data for atomic read-write w/
    2397  * segmentation.
    2398  */
    2399 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2400                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2401 {
    2402 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2403     return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2404 #else
    2405     return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2406 #endif
    2407 }
    2408 
    2409 
    2410 /**
    2411  * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
    2412  */
    2413 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2414                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2415 {
    2416 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2417     return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2418 #else
    2419     return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2420 #endif
    2421 }
    2422 
    2423 
    2424 /**
    2425  * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
    2426  */
    2427 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2428                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2429 {
    2430 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2431     return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2432 #else
    2433     return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2434 #endif
    2435 }
    2436 
    2437 
    2438 /**
    2439  * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
    2440  */
    2441 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2442                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2443 {
    2444 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2445     return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2446 #else
    2447     return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2448 #endif
    2449 }
    2450 
    2451 
    2452 /**
    2453  * Used by TB code to map unsigned 64-bit data for atomic read-write w/
    2454  * segmentation.
    2455  */
    2456 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2457                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2458 {
    2459 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2460     return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2461 #else
    2462     return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2463 #endif
    2464 }
    2465 
    2466 
    2467 /**
    2468  * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
    2469  */
    2470 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2471                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2472 {
    2473 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2474     return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2475 #else
    2476     return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2477 #endif
    2478 }
    2479 
    2480 
    2481 /**
    2482  * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
    2483  */
    2484 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2485                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2486 {
    2487 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2488     return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2489 #else
    2490     return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2491 #endif
    2492 }
    2493 
    2494 
    2495 /**
    2496  * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
    2497  */
    2498 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2499                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2500 {
    2501 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2502     return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2503 #else
    2504     return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2505 #endif
    2506 }
    2507 
    2508 
    2509 /**
    2510  * Used by TB code to map 80-bit float data writeonly w/ segmentation.
    2511  */
    2512 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2513                                                                    RTGCPTR GCPtrMem, uint8_t iSegReg))
    2514 {
    2515 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2516     return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2517 #else
    2518     return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2519 #endif
    2520 }
    2521 
    2522 
    2523 /**
    2524  * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
    2525  */
    2526 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2527                                                                   RTGCPTR GCPtrMem, uint8_t iSegReg))
    2528 {
    2529 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2530     return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2531 #else
    2532     return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2533 #endif
    2534 }
    2535 
    2536 
    2537 /**
    2538  * Used by TB code to map unsigned 128-bit data for atomic read-write w/
    2539  * segmentation.
    2540  */
    2541 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2542                                                                         RTGCPTR GCPtrMem, uint8_t iSegReg))
    2543 {
    2544 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2545     return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2546 #else
    2547     return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2548 #endif
    2549 }
    2550 
    2551 
    2552 /**
    2553  * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
    2554  */
    2555 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2556                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2557 {
    2558 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2559     return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2560 #else
    2561     return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2562 #endif
    2563 }
    2564 
    2565 
    2566 /**
    2567  * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
    2568  */
    2569 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2570                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2571 {
    2572 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2573     return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2574 #else
    2575     return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2576 #endif
    2577 }
    2578 
    2579 
    2580 /**
    2581  * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
    2582  */
    2583 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2584                                                                           RTGCPTR GCPtrMem, uint8_t iSegReg))
    2585 {
    2586 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2587     return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2588 #else
    2589     return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2590 #endif
    2591 }
    2592 
    2593 
    2594 /*********************************************************************************************************************************
    2595 *   Helpers: Flat memory mapping.                                                                                                *
    2596 *********************************************************************************************************************************/
    2597 
    2598 /**
    2599  * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
    2600  * address.
    2601  */
    2602 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2603 {
    2604 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2605     return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2606 #else
    2607     return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2608 #endif
    2609 }
    2610 
    2611 
    2612 /**
    2613  * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
    2614  */
    2615 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2616 {
    2617 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2618     return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2619 #else
    2620     return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2621 #endif
    2622 }
    2623 
    2624 
    2625 /**
    2626  * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
    2627  */
    2628 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2629 {
    2630 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2631     return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2632 #else
    2633     return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2634 #endif
    2635 }
    2636 
    2637 
    2638 /**
    2639  * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
    2640  */
    2641 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2642 {
    2643 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2644     return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2645 #else
    2646     return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2647 #endif
    2648 }
    2649 
    2650 
    2651 /**
    2652  * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
    2653  * address.
    2654  */
    2655 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2656 {
    2657 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2658     return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2659 #else
    2660     return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2661 #endif
    2662 }
    2663 
    2664 
    2665 /**
    2666  * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
    2667  */
    2668 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2669 {
    2670 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2671     return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2672 #else
    2673     return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2674 #endif
    2675 }
    2676 
    2677 
    2678 /**
    2679  * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
    2680  */
    2681 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2682 {
    2683 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2684     return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2685 #else
    2686     return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2687 #endif
    2688 }
    2689 
    2690 
    2691 /**
    2692  * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
    2693  */
    2694 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2695 {
    2696 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2697     return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2698 #else
    2699     return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2700 #endif
    2701 }
    2702 
    2703 
    2704 /**
    2705  * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
    2706  * address.
    2707  */
    2708 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2709 {
    2710 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2711     return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2712 #else
    2713     return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2714 #endif
    2715 }
    2716 
    2717 
    2718 /**
    2719  * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
    2720  */
    2721 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2722 {
    2723 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2724     return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2725 #else
    2726     return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2727 #endif
    2728 }
    2729 
    2730 
    2731 /**
    2732  * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
    2733  */
    2734 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2735 {
    2736 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2737     return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2738 #else
    2739     return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2740 #endif
    2741 }
    2742 
    2743 
    2744 /**
    2745  * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
    2746  */
    2747 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2748 {
    2749 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2750     return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2751 #else
    2752     return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2753 #endif
    2754 }
    2755 
    2756 
    2757 /**
    2758  * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
    2759  * address.
    2760  */
    2761 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2762 {
    2763 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2764     return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2765 #else
    2766     return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2767 #endif
    2768 }
    2769 
    2770 
    2771 /**
    2772  * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
    2773  */
    2774 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2775 {
    2776 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2777     return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2778 #else
    2779     return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2780 #endif
    2781 }
    2782 
    2783 
    2784 /**
    2785  * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
    2786  */
    2787 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2788 {
    2789 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2790     return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2791 #else
    2792     return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2793 #endif
    2794 }
    2795 
    2796 
    2797 /**
    2798  * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
    2799  */
    2800 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2801 {
    2802 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2803     return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2804 #else
    2805     return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2806 #endif
    2807 }
    2808 
    2809 
    2810 /**
    2811  * Used by TB code to map 80-bit float data writeonly w/ flat address.
    2812  */
    2813 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2814 {
    2815 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2816     return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2817 #else
    2818     return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2819 #endif
    2820 }
    2821 
    2822 
    2823 /**
    2824  * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
    2825  */
    2826 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2827 {
    2828 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2829     return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2830 #else
    2831     return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2832 #endif
    2833 }
    2834 
    2835 
    2836 /**
    2837  * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
    2838  * address.
    2839  */
    2840 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2841 {
    2842 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2843     return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2844 #else
    2845     return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2846 #endif
    2847 }
    2848 
    2849 
    2850 /**
    2851  * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
    2852  */
    2853 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2854 {
    2855 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2856     return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2857 #else
    2858     return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2859 #endif
    2860 }
    2861 
    2862 
    2863 /**
    2864  * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
    2865  */
    2866 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2867 {
    2868 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2869     return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2870 #else
    2871     return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2872 #endif
    2873 }
    2874 
    2875 
    2876 /**
    2877  * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
    2878  */
    2879 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2880 {
    2881 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2882     return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2883 #else
    2884     return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2885 #endif
    2886 }
    2887 
    2888 
    2889 /*********************************************************************************************************************************
    2890 *   Helpers: Commit, rollback & unmap                                                                                            *
    2891 *********************************************************************************************************************************/
    2892 
    2893 /**
    2894  * Used by TB code to commit and unmap a read-write memory mapping.
    2895  */
    2896 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2897 {
    2898     return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
    2899 }
    2900 
    2901 
    2902 /**
    2903  * Used by TB code to commit and unmap a read-write memory mapping.
    2904  */
    2905 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2906 {
    2907     return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
    2908 }
    2909 
    2910 
    2911 /**
    2912  * Used by TB code to commit and unmap a write-only memory mapping.
    2913  */
    2914 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2915 {
    2916     return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
    2917 }
    2918 
    2919 
    2920 /**
    2921  * Used by TB code to commit and unmap a read-only memory mapping.
    2922  */
    2923 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2924 {
    2925     return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
    2926 }
    2927 
    2928 
    2929 /**
    2930  * Reinitializes the native recompiler state.
    2931  *
    2932  * Called before starting a new recompile job.
    2933  */
    2934 static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
    2935 {
    2936     pReNative->cLabels                     = 0;
    2937     pReNative->bmLabelTypes                = 0;
    2938     pReNative->cFixups                     = 0;
    2939 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2940     pReNative->pDbgInfo->cEntries          = 0;
    2941 #endif
    2942     pReNative->pTbOrg                      = pTb;
    2943     pReNative->cCondDepth                  = 0;
    2944     pReNative->uCondSeqNo                  = 0;
    2945     pReNative->uCheckIrqSeqNo              = 0;
    2946     pReNative->uTlbSeqNo                   = 0;
    2947 
    2948 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    2949     pReNative->Core.offPc                  = 0;
    2950     pReNative->Core.cInstrPcUpdateSkipped  = 0;
    2951 #endif
    2952     pReNative->Core.bmHstRegs              = IEMNATIVE_REG_FIXED_MASK
    2953 #if IEMNATIVE_HST_GREG_COUNT < 32
    2954                                            | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
    2955 #endif
    2956                                            ;
    2957     pReNative->Core.bmHstRegsWithGstShadow = 0;
    2958     pReNative->Core.bmGstRegShadows        = 0;
    2959     pReNative->Core.bmVars                 = 0;
    2960     pReNative->Core.bmStack                = 0;
    2961     AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
    2962     pReNative->Core.u64ArgVars             = UINT64_MAX;
    2963 
    2964     AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
    2965     pReNative->aidxUniqueLabels[0]         = UINT32_MAX;
    2966     pReNative->aidxUniqueLabels[1]         = UINT32_MAX;
    2967     pReNative->aidxUniqueLabels[2]         = UINT32_MAX;
    2968     pReNative->aidxUniqueLabels[3]         = UINT32_MAX;
    2969     pReNative->aidxUniqueLabels[4]         = UINT32_MAX;
    2970     pReNative->aidxUniqueLabels[5]         = UINT32_MAX;
    2971     pReNative->aidxUniqueLabels[6]         = UINT32_MAX;
    2972     pReNative->aidxUniqueLabels[7]         = UINT32_MAX;
    2973     pReNative->aidxUniqueLabels[8]         = UINT32_MAX;
    2974     pReNative->aidxUniqueLabels[9]         = UINT32_MAX;
    2975     pReNative->aidxUniqueLabels[10]        = UINT32_MAX;
    2976     pReNative->aidxUniqueLabels[11]        = UINT32_MAX;
    2977     pReNative->aidxUniqueLabels[12]        = UINT32_MAX;
    2978 
    2979     /* Full host register reinit: */
    2980     for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
    2981     {
    2982         pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
    2983         pReNative->Core.aHstRegs[i].enmWhat        = kIemNativeWhat_Invalid;
    2984         pReNative->Core.aHstRegs[i].idxVar         = UINT8_MAX;
    2985     }
    2986 
    2987     uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
    2988                    & ~(  RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
    2989 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2990                        | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
    2991 #endif
    2992 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2993                        | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    2994 #endif
    2995 #ifdef IEMNATIVE_REG_FIXED_TMP1
    2996                        | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
    2997 #endif
    2998 #ifdef IEMNATIVE_REG_FIXED_PC_DBG
    2999                        | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
    3000 #endif
    3001                       );
    3002     for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
    3003     {
    3004         fRegs &= ~RT_BIT_32(idxReg);
    3005         pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
    3006     }
    3007 
    3008     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat     = kIemNativeWhat_pVCpuFixed;
    3009 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    3010     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat   = kIemNativeWhat_pCtxFixed;
    3011 #endif
    3012 #ifdef IEMNATIVE_REG_FIXED_TMP0
    3013     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat       = kIemNativeWhat_FixedTmp;
    3014 #endif
    3015 #ifdef IEMNATIVE_REG_FIXED_TMP1
    3016     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat       = kIemNativeWhat_FixedTmp;
    3017 #endif
    3018 #ifdef IEMNATIVE_REG_FIXED_PC_DBG
    3019     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat     = kIemNativeWhat_PcShadow;
    3020 #endif
    3021 
    3022 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    3023 # ifdef RT_ARCH_ARM64
    3024     /*
    3025      * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
    3026      * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
    3027      * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
    3028      * and the register allocator assumes that it will be always free when the lower is picked.
    3029      */
    3030     uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
    3031 # else
    3032     uint32_t const fFixedAdditional = 0;
    3033 # endif
    3034 
    3035     pReNative->Core.bmHstSimdRegs          = IEMNATIVE_SIMD_REG_FIXED_MASK
    3036                                            | fFixedAdditional
    3037 # if IEMNATIVE_HST_SIMD_REG_COUNT < 32
    3038                                            | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
    3039 # endif
    3040                                            ;
    3041     pReNative->Core.bmHstSimdRegsWithGstShadow   = 0;
    3042     pReNative->Core.bmGstSimdRegShadows          = 0;
    3043     pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
    3044     pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
    3045 
    3046     /* Full host register reinit: */
    3047     for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
    3048     {
    3049         pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
    3050         pReNative->Core.aHstSimdRegs[i].enmWhat        = kIemNativeWhat_Invalid;
    3051         pReNative->Core.aHstSimdRegs[i].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    3052     }
    3053 
    3054     fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
    3055     for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
    3056     {
    3057         fRegs &= ~RT_BIT_32(idxReg);
    3058         pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
    3059     }
    3060 
    3061 #ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
    3062     pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
    3063 #endif
    3064 
    3065 #endif
    3066 
    3067     return pReNative;
    3068 }
    3069 
    3070 
    3071 /**
    3072  * Allocates and initializes the native recompiler state.
    3073  *
    3074  * This is called the first time an EMT wants to recompile something.
    3075  *
    3076  * @returns Pointer to the new recompiler state.
    3077  * @param   pVCpu   The cross context virtual CPU structure of the calling
    3078  *                  thread.
    3079  * @param   pTb     The TB that's about to be recompiled.
    3080  * @thread  EMT(pVCpu)
    3081  */
    3082 static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
    3083 {
    3084     VMCPU_ASSERT_EMT(pVCpu);
    3085 
    3086     PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
    3087     AssertReturn(pReNative, NULL);
    3088 
    3089     /*
    3090      * Try allocate all the buffers and stuff we need.
    3091      */
    3092     pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
    3093     pReNative->paLabels  = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
    3094     pReNative->paFixups  = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
    3095 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3096     pReNative->pDbgInfo  = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
    3097 #endif
    3098     if (RT_LIKELY(   pReNative->pInstrBuf
    3099                   && pReNative->paLabels
    3100                   && pReNative->paFixups)
    3101 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3102         && pReNative->pDbgInfo
    3103 #endif
    3104        )
    3105     {
    3106         /*
    3107          * Set the buffer & array sizes on success.
    3108          */
    3109         pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
    3110         pReNative->cLabelsAlloc   = _8K;
    3111         pReNative->cFixupsAlloc   = _16K;
    3112 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3113         pReNative->cDbgInfoAlloc  = _16K;
    3114 #endif
    3115 
    3116         /* Other constant stuff: */
    3117         pReNative->pVCpu          = pVCpu;
    3118 
    3119         /*
    3120          * Done, just need to save it and reinit it.
    3121          */
    3122         pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
    3123         return iemNativeReInit(pReNative, pTb);
    3124     }
    3125 
    3126     /*
    3127      * Failed. Cleanup and return.
    3128      */
    3129     AssertFailed();
    3130     RTMemFree(pReNative->pInstrBuf);
    3131     RTMemFree(pReNative->paLabels);
    3132     RTMemFree(pReNative->paFixups);
    3133 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3134     RTMemFree(pReNative->pDbgInfo);
    3135 #endif
    3136     RTMemFree(pReNative);
    3137     return NULL;
    3138 }
    3139 
    3140 
    3141 /**
    3142  * Creates a label
    3143  *
    3144  * If the label does not yet have a defined position,
    3145  * call iemNativeLabelDefine() later to set it.
    3146  *
    3147  * @returns Label ID. Throws VBox status code on failure, so no need to check
    3148  *          the return value.
    3149  * @param   pReNative   The native recompile state.
    3150  * @param   enmType     The label type.
    3151  * @param   offWhere    The instruction offset of the label.  UINT32_MAX if the
    3152  *                      label is not yet defined (default).
    3153  * @param   uData       Data associated with the lable. Only applicable to
    3154  *                      certain type of labels. Default is zero.
    3155  */
    3156 DECL_HIDDEN_THROW(uint32_t)
    3157 iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    3158                      uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
    3159 {
    3160     Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
    3161 
    3162     /*
    3163      * Locate existing label definition.
    3164      *
    3165      * This is only allowed for forward declarations where offWhere=UINT32_MAX
    3166      * and uData is zero.
    3167      */
    3168     PIEMNATIVELABEL paLabels = pReNative->paLabels;
    3169     uint32_t const  cLabels  = pReNative->cLabels;
    3170     if (   pReNative->bmLabelTypes & RT_BIT_64(enmType)
    3171 #ifndef VBOX_STRICT
    3172         && enmType  <  kIemNativeLabelType_FirstWithMultipleInstances
    3173         && offWhere == UINT32_MAX
    3174         && uData    == 0
    3175 #endif
    3176         )
    3177     {
    3178 #ifndef VBOX_STRICT
    3179         AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
    3180                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3181         uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
    3182         if (idxLabel < pReNative->cLabels)
    3183             return idxLabel;
    3184 #else
    3185         for (uint32_t i = 0; i < cLabels; i++)
    3186             if (   paLabels[i].enmType == enmType
    3187                 && paLabels[i].uData   == uData)
    3188             {
    3189                 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3190                 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3191                 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
    3192                 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
    3193                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3194                 return i;
    3195             }
    3196         AssertStmt(   enmType >= kIemNativeLabelType_FirstWithMultipleInstances
    3197                    || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3198 #endif
    3199     }
    3200 
    3201     /*
    3202      * Make sure we've got room for another label.
    3203      */
    3204     if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
    3205     { /* likely */ }
    3206     else
    3207     {
    3208         uint32_t cNew = pReNative->cLabelsAlloc;
    3209         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    3210         AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    3211         cNew *= 2;
    3212         AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
    3213         paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
    3214         AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
    3215         pReNative->paLabels     = paLabels;
    3216         pReNative->cLabelsAlloc = cNew;
    3217     }
    3218 
    3219     /*
    3220      * Define a new label.
    3221      */
    3222     paLabels[cLabels].off     = offWhere;
    3223     paLabels[cLabels].enmType = enmType;
    3224     paLabels[cLabels].uData   = uData;
    3225     pReNative->cLabels = cLabels + 1;
    3226 
    3227     Assert((unsigned)enmType < 64);
    3228     pReNative->bmLabelTypes |= RT_BIT_64(enmType);
    3229 
    3230     if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    3231     {
    3232         Assert(uData == 0);
    3233         pReNative->aidxUniqueLabels[enmType] = cLabels;
    3234     }
    3235 
    3236     if (offWhere != UINT32_MAX)
    3237     {
    3238 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3239         iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    3240         iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
    3241 #endif
    3242     }
    3243     return cLabels;
    3244 }
    3245 
    3246 
    3247 /**
    3248  * Defines the location of an existing label.
    3249  *
    3250  * @param   pReNative   The native recompile state.
    3251  * @param   idxLabel    The label to define.
    3252  * @param   offWhere    The position.
    3253  */
    3254 DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
    3255 {
    3256     AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
    3257     PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
    3258     AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
    3259     pLabel->off = offWhere;
    3260 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3261     iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    3262     iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
    3263 #endif
    3264 }
    3265 
    3266 
    3267 /**
    3268  * Looks up a lable.
    3269  *
    3270  * @returns Label ID if found, UINT32_MAX if not.
    3271  */
    3272 static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    3273                                    uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
    3274 {
    3275     Assert((unsigned)enmType < 64);
    3276     if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
    3277     {
    3278         if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    3279             return pReNative->aidxUniqueLabels[enmType];
    3280 
    3281         PIEMNATIVELABEL paLabels = pReNative->paLabels;
    3282         uint32_t const  cLabels  = pReNative->cLabels;
    3283         for (uint32_t i = 0; i < cLabels; i++)
    3284             if (   paLabels[i].enmType == enmType
    3285                 && paLabels[i].uData   == uData
    3286                 && (   paLabels[i].off == offWhere
    3287                     || offWhere        == UINT32_MAX
    3288                     || paLabels[i].off == UINT32_MAX))
    3289                 return i;
    3290     }
    3291     return UINT32_MAX;
    3292 }
    3293 
    3294 
    3295 /**
    3296  * Adds a fixup.
    3297  *
    3298  * @throws  VBox status code (int) on failure.
    3299  * @param   pReNative   The native recompile state.
    3300  * @param   offWhere    The instruction offset of the fixup location.
    3301  * @param   idxLabel    The target label ID for the fixup.
    3302  * @param   enmType     The fixup type.
    3303  * @param   offAddend   Fixup addend if applicable to the type. Default is 0.
    3304  */
    3305 DECL_HIDDEN_THROW(void)
    3306 iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
    3307                   IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
    3308 {
    3309     Assert(idxLabel <= UINT16_MAX);
    3310     Assert((unsigned)enmType <= UINT8_MAX);
    3311 
    3312     /*
    3313      * Make sure we've room.
    3314      */
    3315     PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
    3316     uint32_t const  cFixups  = pReNative->cFixups;
    3317     if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
    3318     { /* likely */ }
    3319     else
    3320     {
    3321         uint32_t cNew = pReNative->cFixupsAlloc;
    3322         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    3323         AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    3324         cNew *= 2;
    3325         AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
    3326         paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
    3327         AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
    3328         pReNative->paFixups     = paFixups;
    3329         pReNative->cFixupsAlloc = cNew;
    3330     }
    3331 
    3332     /*
    3333      * Add the fixup.
    3334      */
    3335     paFixups[cFixups].off       = offWhere;
    3336     paFixups[cFixups].idxLabel  = (uint16_t)idxLabel;
    3337     paFixups[cFixups].enmType   = enmType;
    3338     paFixups[cFixups].offAddend = offAddend;
    3339     pReNative->cFixups = cFixups + 1;
    3340 }
    3341 
    3342 
    3343 /**
    3344  * Slow code path for iemNativeInstrBufEnsure.
    3345  */
    3346 DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
    3347 {
    3348     /* Double the buffer size till we meet the request. */
    3349     uint32_t cNew = pReNative->cInstrBufAlloc;
    3350     AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
    3351     do
    3352         cNew *= 2;
    3353     while (cNew < off + cInstrReq);
    3354 
    3355     uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
    3356 #ifdef RT_ARCH_ARM64
    3357     uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
    3358 #else
    3359     uint32_t const cbMaxInstrBuf = _2M;
    3360 #endif
    3361     AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
    3362 
    3363     void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
    3364     AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
    3365 
    3366 #ifdef VBOX_STRICT
    3367     pReNative->offInstrBufChecked = off + cInstrReq;
    3368 #endif
    3369     pReNative->cInstrBufAlloc     = cNew;
    3370     return pReNative->pInstrBuf   = (PIEMNATIVEINSTR)pvNew;
    3371 }
    3372 
    3373 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3374 
    3375 /**
    3376  * Grows the static debug info array used during recompilation.
    3377  *
    3378  * @returns Pointer to the new debug info block; throws VBox status code on
    3379  *          failure, so no need to check the return value.
    3380  */
    3381 DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    3382 {
    3383     uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
    3384     AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
    3385     pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
    3386     AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
    3387     pReNative->pDbgInfo      = pDbgInfo;
    3388     pReNative->cDbgInfoAlloc = cNew;
    3389     return pDbgInfo;
    3390 }
    3391 
    3392 
    3393 /**
    3394  * Adds a new debug info uninitialized entry, returning the pointer to it.
    3395  */
    3396 DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    3397 {
    3398     if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
    3399     { /* likely */ }
    3400     else
    3401         pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
    3402     return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
    3403 }
    3404 
    3405 
    3406 /**
    3407  * Debug Info: Adds a native offset record, if necessary.
    3408  */
    3409 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    3410 {
    3411     PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
    3412 
    3413     /*
    3414      * Search backwards to see if we've got a similar record already.
    3415      */
    3416     uint32_t idx     = pDbgInfo->cEntries;
    3417     uint32_t idxStop = idx > 8 ? idx - 8 : 0;
    3418     while (idx-- > idxStop)
    3419         if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
    3420         {
    3421             if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
    3422                 return;
    3423             AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
    3424                        IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
    3425             break;
    3426         }
    3427 
    3428     /*
    3429      * Add it.
    3430      */
    3431     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
    3432     pEntry->NativeOffset.uType     = kIemTbDbgEntryType_NativeOffset;
    3433     pEntry->NativeOffset.offNative = off;
    3434 }
    3435 
    3436 
    3437 /**
    3438  * Debug Info: Record info about a label.
    3439  */
    3440 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
    3441 {
    3442     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3443     pEntry->Label.uType    = kIemTbDbgEntryType_Label;
    3444     pEntry->Label.uUnused  = 0;
    3445     pEntry->Label.enmLabel = (uint8_t)enmType;
    3446     pEntry->Label.uData    = uData;
    3447 }
    3448 
    3449 
    3450 /**
    3451  * Debug Info: Record info about a threaded call.
    3452  */
    3453 static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
    3454 {
    3455     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3456     pEntry->ThreadedCall.uType       = kIemTbDbgEntryType_ThreadedCall;
    3457     pEntry->ThreadedCall.fRecompiled = fRecompiled;
    3458     pEntry->ThreadedCall.uUnused     = 0;
    3459     pEntry->ThreadedCall.enmCall     = (uint16_t)enmCall;
    3460 }
    3461 
    3462 
    3463 /**
    3464  * Debug Info: Record info about a new guest instruction.
    3465  */
    3466 static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
    3467 {
    3468     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3469     pEntry->GuestInstruction.uType   = kIemTbDbgEntryType_GuestInstruction;
    3470     pEntry->GuestInstruction.uUnused = 0;
    3471     pEntry->GuestInstruction.fExec   = fExec;
    3472 }
    3473 
    3474 
    3475 /**
    3476  * Debug Info: Record info about guest register shadowing.
    3477  */
    3478 static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
    3479                                                  uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
    3480 {
    3481     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3482     pEntry->GuestRegShadowing.uType         = kIemTbDbgEntryType_GuestRegShadowing;
    3483     pEntry->GuestRegShadowing.uUnused       = 0;
    3484     pEntry->GuestRegShadowing.idxGstReg     = enmGstReg;
    3485     pEntry->GuestRegShadowing.idxHstReg     = idxHstReg;
    3486     pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
    3487 }
    3488 
    3489 
    3490 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    3491 /**
    3492  * Debug Info: Record info about guest register shadowing.
    3493  */
    3494 static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    3495                                                      uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
    3496 {
    3497     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3498     pEntry->GuestSimdRegShadowing.uType             = kIemTbDbgEntryType_GuestSimdRegShadowing;
    3499     pEntry->GuestSimdRegShadowing.uUnused           = 0;
    3500     pEntry->GuestSimdRegShadowing.idxGstSimdReg     = enmGstSimdReg;
    3501     pEntry->GuestSimdRegShadowing.idxHstSimdReg     = idxHstSimdReg;
    3502     pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
    3503 }
    3504 # endif
    3505 
    3506 
    3507 # ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    3508 /**
    3509  * Debug Info: Record info about delayed RIP updates.
    3510  */
    3511 static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
    3512 {
    3513     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3514     pEntry->DelayedPcUpdate.uType         = kIemTbDbgEntryType_DelayedPcUpdate;
    3515     pEntry->DelayedPcUpdate.offPc         = offPc;
    3516     pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
    3517 }
    3518 # endif
    3519 
    3520 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    3521 
    3522 
    3523 /*********************************************************************************************************************************
    3524 *   Register Allocator                                                                                                           *
    3525 *********************************************************************************************************************************/
    3526 
    3527 /**
    3528  * Register parameter indexes (indexed by argument number).
    3529  */
    3530 DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
    3531 {
    3532     IEMNATIVE_CALL_ARG0_GREG,
    3533     IEMNATIVE_CALL_ARG1_GREG,
    3534     IEMNATIVE_CALL_ARG2_GREG,
    3535     IEMNATIVE_CALL_ARG3_GREG,
    3536 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    3537     IEMNATIVE_CALL_ARG4_GREG,
    3538 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    3539     IEMNATIVE_CALL_ARG5_GREG,
    3540 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    3541     IEMNATIVE_CALL_ARG6_GREG,
    3542 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    3543     IEMNATIVE_CALL_ARG7_GREG,
    3544 #   endif
    3545 #  endif
    3546 # endif
    3547 #endif
    3548 };
    3549 
    3550 /**
    3551  * Call register masks indexed by argument count.
    3552  */
    3553 DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
    3554 {
    3555     0,
    3556     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
    3557     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
    3558     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
    3559       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3560     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
    3561 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    3562       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3563     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
    3564 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    3565       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3566     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
    3567 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    3568       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3569     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    3570     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
    3571 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    3572       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3573     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    3574     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
    3575 #   endif
    3576 #  endif
    3577 # endif
    3578 #endif
    3579 };
    3580 
    3581 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    3582 /**
    3583  * BP offset of the stack argument slots.
    3584  *
    3585  * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
    3586  * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
    3587  */
    3588 DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
    3589 {
    3590     IEMNATIVE_FP_OFF_STACK_ARG0,
    3591 # ifdef IEMNATIVE_FP_OFF_STACK_ARG1
    3592     IEMNATIVE_FP_OFF_STACK_ARG1,
    3593 # endif
    3594 # ifdef IEMNATIVE_FP_OFF_STACK_ARG2
    3595     IEMNATIVE_FP_OFF_STACK_ARG2,
    3596 # endif
    3597 # ifdef IEMNATIVE_FP_OFF_STACK_ARG3
    3598     IEMNATIVE_FP_OFF_STACK_ARG3,
    3599 # endif
    3600 };
    3601 AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
    3602 #endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
    3603 
    3604 /**
    3605  * Info about shadowed guest register values.
    3606  * @see IEMNATIVEGSTREG
    3607  */
    3608 static struct
    3609 {
    3610     /** Offset in VMCPU. */
    3611     uint32_t    off;
    3612     /** The field size. */
    3613     uint8_t     cb;
    3614     /** Name (for logging). */
    3615     const char *pszName;
    3616 } const g_aGstShadowInfo[] =
    3617 {
    3618 #define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
    3619     /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */  { CPUMCTX_OFF_AND_SIZE(rax),                "rax", },
    3620     /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */  { CPUMCTX_OFF_AND_SIZE(rcx),                "rcx", },
    3621     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */  { CPUMCTX_OFF_AND_SIZE(rdx),                "rdx", },
    3622     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */  { CPUMCTX_OFF_AND_SIZE(rbx),                "rbx", },
    3623     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */  { CPUMCTX_OFF_AND_SIZE(rsp),                "rsp", },
    3624     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */  { CPUMCTX_OFF_AND_SIZE(rbp),                "rbp", },
    3625     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */  { CPUMCTX_OFF_AND_SIZE(rsi),                "rsi", },
    3626     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */  { CPUMCTX_OFF_AND_SIZE(rdi),                "rdi", },
    3627     /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */  { CPUMCTX_OFF_AND_SIZE(r8),                 "r8", },
    3628     /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */  { CPUMCTX_OFF_AND_SIZE(r9),                 "r9", },
    3629     /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */  { CPUMCTX_OFF_AND_SIZE(r10),                "r10", },
    3630     /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */  { CPUMCTX_OFF_AND_SIZE(r11),                "r11", },
    3631     /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */  { CPUMCTX_OFF_AND_SIZE(r12),                "r12", },
    3632     /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */  { CPUMCTX_OFF_AND_SIZE(r13),                "r13", },
    3633     /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */  { CPUMCTX_OFF_AND_SIZE(r14),                "r14", },
    3634     /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */  { CPUMCTX_OFF_AND_SIZE(r15),                "r15", },
    3635     /* [kIemNativeGstReg_Pc] = */                       { CPUMCTX_OFF_AND_SIZE(rip),                "rip", },
    3636     /* [kIemNativeGstReg_Cr0] = */                      { CPUMCTX_OFF_AND_SIZE(cr0),                "cr0", },
    3637     /* [kIemNativeGstReg_FpuFcw] = */                   { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW),     "fcw", },
    3638     /* [kIemNativeGstReg_FpuFsw] = */                   { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW),     "fsw", },
    3639     /* [kIemNativeGstReg_SegBaseFirst + 0] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base),  "es_base", },
    3640     /* [kIemNativeGstReg_SegBaseFirst + 1] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base),  "cs_base", },
    3641     /* [kIemNativeGstReg_SegBaseFirst + 2] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base),  "ss_base", },
    3642     /* [kIemNativeGstReg_SegBaseFirst + 3] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base),  "ds_base", },
    3643     /* [kIemNativeGstReg_SegBaseFirst + 4] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base),  "fs_base", },
    3644     /* [kIemNativeGstReg_SegBaseFirst + 5] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base),  "gs_base", },
    3645     /* [kIemNativeGstReg_SegAttribFirst + 0] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u),   "es_attrib", },
    3646     /* [kIemNativeGstReg_SegAttribFirst + 1] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u),   "cs_attrib", },
    3647     /* [kIemNativeGstReg_SegAttribFirst + 2] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u),   "ss_attrib", },
    3648     /* [kIemNativeGstReg_SegAttribFirst + 3] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u),   "ds_attrib", },
    3649     /* [kIemNativeGstReg_SegAttribFirst + 4] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u),   "fs_attrib", },
    3650     /* [kIemNativeGstReg_SegAttribFirst + 5] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u),   "gs_attrib", },
    3651     /* [kIemNativeGstReg_SegLimitFirst + 0] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
    3652     /* [kIemNativeGstReg_SegLimitFirst + 1] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
    3653     /* [kIemNativeGstReg_SegLimitFirst + 2] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
    3654     /* [kIemNativeGstReg_SegLimitFirst + 3] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
    3655     /* [kIemNativeGstReg_SegLimitFirst + 4] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
    3656     /* [kIemNativeGstReg_SegLimitFirst + 5] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
    3657     /* [kIemNativeGstReg_SegSelFirst + 0] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel),      "es", },
    3658     /* [kIemNativeGstReg_SegSelFirst + 1] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel),      "cs", },
    3659     /* [kIemNativeGstReg_SegSelFirst + 2] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel),      "ss", },
    3660     /* [kIemNativeGstReg_SegSelFirst + 3] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel),      "ds", },
    3661     /* [kIemNativeGstReg_SegSelFirst + 4] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel),      "fs", },
    3662     /* [kIemNativeGstReg_SegSelFirst + 5] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel),      "gs", },
    3663     /* [kIemNativeGstReg_Cr4] = */                      { CPUMCTX_OFF_AND_SIZE(cr4),                "cr4", },
    3664     /* [kIemNativeGstReg_Xcr0] = */                     { CPUMCTX_OFF_AND_SIZE(aXcr[0]),            "xcr0", },
    3665     /* [kIemNativeGstReg_MxCsr] = */                    { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR),   "mxcsr", },
    3666     /* [kIemNativeGstReg_EFlags] = */                   { CPUMCTX_OFF_AND_SIZE(eflags),             "eflags", },
    3667 #undef CPUMCTX_OFF_AND_SIZE
    3668 };
    3669 AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
    3670 
    3671 
    3672 /** Host CPU general purpose register names. */
    3673 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
    3674 {
    3675 #ifdef RT_ARCH_AMD64
    3676     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
    3677 #elif RT_ARCH_ARM64
    3678     "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",  "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    3679     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp",  "lr",  "sp/xzr",
    3680 #else
    3681 # error "port me"
    3682 #endif
    3683 };
    3684 
    3685 
    3686 DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
    3687                                                      IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
    3688 {
    3689     pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    3690 
    3691     pReNative->Core.aHstRegs[idxReg].enmWhat        = enmWhat;
    3692     pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3693     pReNative->Core.aHstRegs[idxReg].idxVar         = idxVar;
    3694     return (uint8_t)idxReg;
    3695 }
    3696 
    3697 
    3698 #if 0 /* unused */
    3699 /**
    3700  * Tries to locate a suitable register in the given register mask.
    3701  *
    3702  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    3703  * failed.
    3704  *
    3705  * @returns Host register number on success, returns UINT8_MAX on failure.
    3706  */
    3707 static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
    3708 {
    3709     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3710     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    3711     if (fRegs)
    3712     {
    3713         /** @todo pick better here:    */
    3714         unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
    3715 
    3716         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3717         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3718                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3719         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3720 
    3721         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3722         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3723         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3724         return idxReg;
    3725     }
    3726     return UINT8_MAX;
    3727 }
    3728 #endif /* unused */
    3729 
    3730 
    3731 /**
    3732  * Locate a register, possibly freeing one up.
    3733  *
    3734  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    3735  * failed.
    3736  *
    3737  * @returns Host register number on success. Returns UINT8_MAX if no registers
    3738  *          found, the caller is supposed to deal with this and raise a
    3739  *          allocation type specific status code (if desired).
    3740  *
    3741  * @throws  VBox status code if we're run into trouble spilling a variable of
    3742  *          recording debug info.  Does NOT throw anything if we're out of
    3743  *          registers, though.
    3744  */
    3745 static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
    3746                                          uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
    3747 {
    3748     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
    3749     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3750     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    3751 
    3752     /*
    3753      * Try a freed register that's shadowing a guest register.
    3754      */
    3755     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    3756     if (fRegs)
    3757     {
    3758         STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
    3759 
    3760 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    3761         /*
    3762          * When we have livness information, we use it to kick out all shadowed
    3763          * guest register that will not be needed any more in this TB.  If we're
    3764          * lucky, this may prevent us from ending up here again.
    3765          *
    3766          * Note! We must consider the previous entry here so we don't free
    3767          *       anything that the current threaded function requires (current
    3768          *       entry is produced by the next threaded function).
    3769          */
    3770         uint32_t const idxCurCall = pReNative->idxCurCall;
    3771         if (idxCurCall > 0)
    3772         {
    3773             PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
    3774 
    3775 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    3776             /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
    3777             AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
    3778             uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
    3779 #else
    3780             /* Construct a mask of the registers not in the read or write state.
    3781                Note! We could skips writes, if they aren't from us, as this is just
    3782                      a hack to prevent trashing registers that have just been written
    3783                      or will be written when we retire the current instruction. */
    3784             uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    3785                                  & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    3786                                  & IEMLIVENESSBIT_MASK;
    3787 #endif
    3788             /* Merge EFLAGS. */
    3789             uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3);   /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
    3790             fTmp &= fTmp >> 2;                                  /*         CF3,Other3 = AF2,PF2 & CF2,Other2  */
    3791             fTmp &= fTmp >> 1;                                  /*             Other4 = CF3 & Other3 */
    3792             fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
    3793             fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
    3794 
    3795             /* If it matches any shadowed registers. */
    3796             if (pReNative->Core.bmGstRegShadows & fToFreeMask)
    3797             {
    3798                 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
    3799                 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
    3800                 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
    3801 
    3802                 /* See if we've got any unshadowed registers we can return now. */
    3803                 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
    3804                 if (fUnshadowedRegs)
    3805                 {
    3806                     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
    3807                     return (fPreferVolatile
    3808                             ? ASMBitFirstSetU32(fUnshadowedRegs)
    3809                             : ASMBitLastSetU32(  fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3810                                                ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
    3811                          - 1;
    3812                 }
    3813             }
    3814         }
    3815 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    3816 
    3817         unsigned const idxReg = (fPreferVolatile
    3818                                  ? ASMBitFirstSetU32(fRegs)
    3819                                  : ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3820                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
    3821                               - 1;
    3822 
    3823         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3824         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3825                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3826         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3827 
    3828         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3829         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3830         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3831         return idxReg;
    3832     }
    3833 
    3834     /*
    3835      * Try free up a variable that's in a register.
    3836      *
    3837      * We do two rounds here, first evacuating variables we don't need to be
    3838      * saved on the stack, then in the second round move things to the stack.
    3839      */
    3840     STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
    3841     for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
    3842     {
    3843         uint32_t fVars = pReNative->Core.bmVars;
    3844         while (fVars)
    3845         {
    3846             uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
    3847             uint8_t const  idxReg = pReNative->Core.aVars[idxVar].idxReg;
    3848             if (   idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
    3849                 && (RT_BIT_32(idxReg) & fRegMask)
    3850                 && (  iLoop == 0
    3851                     ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
    3852                     : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    3853                 && !pReNative->Core.aVars[idxVar].fRegAcquired)
    3854             {
    3855                 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
    3856                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
    3857                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3858                 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3859                 Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    3860                        == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
    3861 
    3862                 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    3863                 {
    3864                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    3865                     *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
    3866                 }
    3867 
    3868                 pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    3869                 pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxReg);
    3870 
    3871                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3872                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3873                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3874                 return idxReg;
    3875             }
    3876             fVars &= ~RT_BIT_32(idxVar);
    3877         }
    3878     }
    3879 
    3880     return UINT8_MAX;
    3881 }
    3882 
    3883 
    3884 /**
    3885  * Reassigns a variable to a different register specified by the caller.
    3886  *
    3887  * @returns The new code buffer position.
    3888  * @param   pReNative       The native recompile state.
    3889  * @param   off             The current code buffer position.
    3890  * @param   idxVar          The variable index.
    3891  * @param   idxRegOld       The old host register number.
    3892  * @param   idxRegNew       The new host register number.
    3893  * @param   pszCaller       The caller for logging.
    3894  */
    3895 static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3896                                     uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
    3897 {
    3898     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    3899     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
    3900     RT_NOREF(pszCaller);
    3901 
    3902     iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
    3903 
    3904     uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    3905     Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
    3906            pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
    3907     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
    3908 
    3909     pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
    3910     pReNative->Core.aHstRegs[idxRegNew].enmWhat        = kIemNativeWhat_Var;
    3911     pReNative->Core.aHstRegs[idxRegNew].idxVar         = idxVar;
    3912     if (fGstRegShadows)
    3913     {
    3914         pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
    3915                                                | RT_BIT_32(idxRegNew);
    3916         while (fGstRegShadows)
    3917         {
    3918             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    3919             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    3920 
    3921             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
    3922             pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
    3923         }
    3924     }
    3925 
    3926     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
    3927     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    3928     pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
    3929     return off;
    3930 }
    3931 
    3932 
    3933 /**
    3934  * Moves a variable to a different register or spills it onto the stack.
    3935  *
    3936  * This must be a stack variable (kIemNativeVarKind_Stack) because the other
    3937  * kinds can easily be recreated if needed later.
    3938  *
    3939  * @returns The new code buffer position.
    3940  * @param   pReNative       The native recompile state.
    3941  * @param   off             The current code buffer position.
    3942  * @param   idxVar          The variable index.
    3943  * @param   fForbiddenRegs  Mask of the forbidden registers.  Defaults to
    3944  *                          call-volatile registers.
    3945  */
    3946 static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3947                                                 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
    3948 {
    3949     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    3950     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    3951     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    3952     Assert(!pVar->fRegAcquired);
    3953 
    3954     uint8_t const idxRegOld = pVar->idxReg;
    3955     Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3956     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
    3957     Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
    3958     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
    3959            == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
    3960     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3961     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
    3962            == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
    3963 
    3964 
    3965     /** @todo Add statistics on this.*/
    3966     /** @todo Implement basic variable liveness analysis (python) so variables
    3967      * can be freed immediately once no longer used.  This has the potential to
    3968      * be trashing registers and stack for dead variables.
    3969      * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
    3970 
    3971     /*
    3972      * First try move it to a different register, as that's cheaper.
    3973      */
    3974     fForbiddenRegs |= RT_BIT_32(idxRegOld);
    3975     fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
    3976     uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
    3977     if (fRegs)
    3978     {
    3979         /* Avoid using shadow registers, if possible. */
    3980         if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
    3981             fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
    3982         unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
    3983         return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
    3984     }
    3985 
    3986     /*
    3987      * Otherwise we must spill the register onto the stack.
    3988      */
    3989     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    3990     Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    3991            idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    3992     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    3993 
    3994     pVar->idxReg                            = UINT8_MAX;
    3995     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    3996     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    3997     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    3998     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    3999     return off;
    4000 }
    4001 
    4002 
    4003 /**
    4004  * Allocates a temporary host general purpose register.
    4005  *
    4006  * This may emit code to save register content onto the stack in order to free
    4007  * up a register.
    4008  *
    4009  * @returns The host register number; throws VBox status code on failure,
    4010  *          so no need to check the return value.
    4011  * @param   pReNative       The native recompile state.
    4012  * @param   poff            Pointer to the variable with the code buffer position.
    4013  *                          This will be update if we need to move a variable from
    4014  *                          register to stack in order to satisfy the request.
    4015  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    4016  *                          registers (@c true, default) or the other way around
    4017  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    4018  */
    4019 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
    4020 {
    4021     /*
    4022      * Try find a completely unused register, preferably a call-volatile one.
    4023      */
    4024     uint8_t  idxReg;
    4025     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    4026                    & ~pReNative->Core.bmHstRegsWithGstShadow
    4027                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
    4028     if (fRegs)
    4029     {
    4030         if (fPreferVolatile)
    4031             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4032                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    4033         else
    4034             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4035                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    4036         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4037         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    4038         Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
    4039     }
    4040     else
    4041     {
    4042         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
    4043         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    4044         Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
    4045     }
    4046     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    4047 }
    4048 
    4049 
    4050 /**
    4051  * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
    4052  * registers.
    4053  *
    4054  * @returns The host register number; throws VBox status code on failure,
    4055  *          so no need to check the return value.
    4056  * @param   pReNative       The native recompile state.
    4057  * @param   poff            Pointer to the variable with the code buffer position.
    4058  *                          This will be update if we need to move a variable from
    4059  *                          register to stack in order to satisfy the request.
    4060  * @param   fRegMask        Mask of acceptable registers.
    4061  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    4062  *                          registers (@c true, default) or the other way around
    4063  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    4064  */
    4065 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
    4066                                                   bool fPreferVolatile /*= true*/)
    4067 {
    4068     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    4069     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    4070 
    4071     /*
    4072      * Try find a completely unused register, preferably a call-volatile one.
    4073      */
    4074     uint8_t  idxReg;
    4075     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    4076                    & ~pReNative->Core.bmHstRegsWithGstShadow
    4077                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    4078                    & fRegMask;
    4079     if (fRegs)
    4080     {
    4081         if (fPreferVolatile)
    4082             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4083                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    4084         else
    4085             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4086                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    4087         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4088         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    4089         Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
    4090     }
    4091     else
    4092     {
    4093         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
    4094         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    4095         Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
    4096     }
    4097     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    4098 }
    4099 
    4100 
    4101 /**
    4102  * Allocates a temporary register for loading an immediate value into.
    4103  *
    4104  * This will emit code to load the immediate, unless there happens to be an
    4105  * unused register with the value already loaded.
    4106  *
    4107  * The caller will not modify the returned register, it must be considered
    4108  * read-only.  Free using iemNativeRegFreeTmpImm.
    4109  *
    4110  * @returns The host register number; throws VBox status code on failure, so no
    4111  *          need to check the return value.
    4112  * @param   pReNative       The native recompile state.
    4113  * @param   poff            Pointer to the variable with the code buffer position.
    4114  * @param   uImm            The immediate value that the register must hold upon
    4115  *                          return.
    4116  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    4117  *                          registers (@c true, default) or the other way around
    4118  *                          (@c false).
    4119  *
    4120  * @note    Reusing immediate values has not been implemented yet.
    4121  */
    4122 DECL_HIDDEN_THROW(uint8_t)
    4123 iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
    4124 {
    4125     uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
    4126     *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
    4127     return idxReg;
    4128 }
    4129 
    4130 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4131 
    4132 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4133 /**
    4134  * Helper for iemNativeLivenessGetStateByGstReg.
    4135  *
    4136  * @returns IEMLIVENESS_STATE_XXX
    4137  * @param   fMergedStateExp2    This is the RT_BIT_32() of each sub-state
    4138  *                              ORed together.
    4139  */
    4140 DECL_FORCE_INLINE(uint32_t)
    4141 iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
    4142 {
    4143     /* INPUT trumps anything else. */
    4144     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
    4145         return IEMLIVENESS_STATE_INPUT;
    4146 
    4147     /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
    4148     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
    4149     {
    4150         /* If not all sub-fields are clobbered they must be considered INPUT. */
    4151         if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
    4152             return IEMLIVENESS_STATE_INPUT;
    4153         return IEMLIVENESS_STATE_CLOBBERED;
    4154     }
    4155 
    4156     /* XCPT_OR_CALL trumps UNUSED. */
    4157     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
    4158         return IEMLIVENESS_STATE_XCPT_OR_CALL;
    4159 
    4160     return IEMLIVENESS_STATE_UNUSED;
    4161 }
    4162 # endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
    4163 
    4164 
    4165 DECL_FORCE_INLINE(uint32_t)
    4166 iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
    4167 {
    4168 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4169     return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
    4170          | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
    4171 # else
    4172     return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx)       & 1)
    4173          | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
    4174          | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
    4175          | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
    4176 # endif
    4177 }
    4178 
    4179 
    4180 DECL_FORCE_INLINE(uint32_t)
    4181 iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
    4182 {
    4183     uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
    4184     if (enmGstReg == kIemNativeGstReg_EFlags)
    4185     {
    4186         /* Merge the eflags states to one. */
    4187 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4188         uRet  = RT_BIT_32(uRet);
    4189         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
    4190         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
    4191         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
    4192         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
    4193         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
    4194         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
    4195         uRet  = iemNativeLivenessMergeExpandedEFlagsState(uRet);
    4196 # else
    4197         AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
    4198         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
    4199         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
    4200         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
    4201         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
    4202         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
    4203         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
    4204 # endif
    4205     }
    4206     return uRet;
    4207 }
    4208 
    4209 
    4210 # ifdef VBOX_STRICT
    4211 /** For assertions only, user checks that idxCurCall isn't zerow. */
    4212 DECL_FORCE_INLINE(uint32_t)
    4213 iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
    4214 {
    4215     return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
    4216 }
    4217 # endif /* VBOX_STRICT */
    4218 
    4219 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    4220 
    4221 /**
    4222  * Marks host register @a idxHstReg as containing a shadow copy of guest
    4223  * register @a enmGstReg.
    4224  *
    4225  * ASSUMES that caller has made sure @a enmGstReg is not associated with any
    4226  * host register before calling.
    4227  */
    4228 DECL_FORCE_INLINE(void)
    4229 iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4230 {
    4231     Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
    4232     Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4233     Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
    4234 
    4235     pReNative->Core.aidxGstRegShadows[enmGstReg]       = idxHstReg;
    4236     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
    4237     pReNative->Core.bmGstRegShadows                   |= RT_BIT_64(enmGstReg);
    4238     pReNative->Core.bmHstRegsWithGstShadow            |= RT_BIT_32(idxHstReg);
    4239 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4240     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4241     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
    4242 #else
    4243     RT_NOREF(off);
    4244 #endif
    4245 }
    4246 
    4247 
    4248 /**
    4249  * Clear any guest register shadow claims from @a idxHstReg.
    4250  *
    4251  * The register does not need to be shadowing any guest registers.
    4252  */
    4253 DECL_FORCE_INLINE(void)
    4254 iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
    4255 {
    4256     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4257               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    4258            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4259     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    4260            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4261 
    4262 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4263     uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4264     if (fGstRegs)
    4265     {
    4266         Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
    4267         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4268         while (fGstRegs)
    4269         {
    4270             unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4271             fGstRegs &= ~RT_BIT_64(iGstReg);
    4272             iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
    4273         }
    4274     }
    4275 #else
    4276     RT_NOREF(off);
    4277 #endif
    4278 
    4279     pReNative->Core.bmHstRegsWithGstShadow            &= ~RT_BIT_32(idxHstReg);
    4280     pReNative->Core.bmGstRegShadows                   &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4281     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4282 }
    4283 
    4284 
    4285 /**
    4286  * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
    4287  * and global overview flags.
    4288  */
    4289 DECL_FORCE_INLINE(void)
    4290 iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4291 {
    4292     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4293     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4294               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    4295            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4296     Assert(pReNative->Core.bmGstRegShadows                    & RT_BIT_64(enmGstReg));
    4297     Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4298     Assert(pReNative->Core.bmHstRegsWithGstShadow             & RT_BIT_32(idxHstReg));
    4299 
    4300 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4301     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4302     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
    4303 #else
    4304     RT_NOREF(off);
    4305 #endif
    4306 
    4307     uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    4308     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    4309     if (!fGstRegShadowsNew)
    4310         pReNative->Core.bmHstRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
    4311     pReNative->Core.bmGstRegShadows                   &= ~RT_BIT_64(enmGstReg);
    4312 }
    4313 
    4314 
    4315 #if 0 /* unused */
    4316 /**
    4317  * Clear any guest register shadow claim for @a enmGstReg.
    4318  */
    4319 DECL_FORCE_INLINE(void)
    4320 iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4321 {
    4322     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4323     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4324     {
    4325         Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4326         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    4327     }
    4328 }
    4329 #endif
    4330 
    4331 
    4332 /**
    4333  * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
    4334  * as the new shadow of it.
    4335  *
    4336  * Unlike the other guest reg shadow helpers, this does the logging for you.
    4337  * However, it is the liveness state is not asserted here, the caller must do
    4338  * that.
    4339  */
    4340 DECL_FORCE_INLINE(void)
    4341 iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
    4342                                        IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4343 {
    4344     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4345     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4346     {
    4347         uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4348         Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4349         if (idxHstRegOld == idxHstRegNew)
    4350             return;
    4351         Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
    4352                g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
    4353         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    4354     }
    4355     else
    4356         Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
    4357                g_aGstShadowInfo[enmGstReg].pszName));
    4358     iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
    4359 }
    4360 
    4361 
    4362 /**
    4363  * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
    4364  * to @a idxRegTo.
    4365  */
    4366 DECL_FORCE_INLINE(void)
    4367 iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
    4368                                     IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4369 {
    4370     Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
    4371     Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
    4372     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
    4373               == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
    4374            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4375     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
    4376            == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
    4377     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
    4378            == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
    4379 
    4380     uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    4381     pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
    4382     if (!fGstRegShadowsFrom)
    4383         pReNative->Core.bmHstRegsWithGstShadow          &= ~RT_BIT_32(idxRegFrom);
    4384     pReNative->Core.bmHstRegsWithGstShadow              |= RT_BIT_32(idxRegTo);
    4385     pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows   |= RT_BIT_64(enmGstReg);
    4386     pReNative->Core.aidxGstRegShadows[enmGstReg]         = idxRegTo;
    4387 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4388     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4389     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
    4390 #else
    4391     RT_NOREF(off);
    4392 #endif
    4393 }
    4394 
    4395 
    4396 /**
    4397  * Allocates a temporary host general purpose register for keeping a guest
    4398  * register value.
    4399  *
    4400  * Since we may already have a register holding the guest register value,
    4401  * code will be emitted to do the loading if that's not the case. Code may also
    4402  * be emitted if we have to free up a register to satify the request.
    4403  *
    4404  * @returns The host register number; throws VBox status code on failure, so no
    4405  *          need to check the return value.
    4406  * @param   pReNative       The native recompile state.
    4407  * @param   poff            Pointer to the variable with the code buffer
    4408  *                          position. This will be update if we need to move a
    4409  *                          variable from register to stack in order to satisfy
    4410  *                          the request.
    4411  * @param   enmGstReg       The guest register that will is to be updated.
    4412  * @param   enmIntendedUse  How the caller will be using the host register.
    4413  * @param   fNoVolatileRegs Set if no volatile register allowed, clear if any
    4414  *                          register is okay (default).  The ASSUMPTION here is
    4415  *                          that the caller has already flushed all volatile
    4416  *                          registers, so this is only applied if we allocate a
    4417  *                          new register.
    4418  * @param   fSkipLivenessAssert     Hack for liveness input validation of EFLAGS.
    4419  * @sa      iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
    4420  */
    4421 DECL_HIDDEN_THROW(uint8_t)
    4422 iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
    4423                                 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
    4424                                 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
    4425 {
    4426     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    4427 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4428     AssertMsg(   fSkipLivenessAssert
    4429               || pReNative->idxCurCall == 0
    4430               || enmGstReg == kIemNativeGstReg_Pc
    4431               || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    4432                   ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4433                   : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
    4434                   ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4435                   : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(  iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
    4436               ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
    4437 #endif
    4438     RT_NOREF(fSkipLivenessAssert);
    4439 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)
    4440     static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
    4441 #endif
    4442     uint32_t const fRegMask = !fNoVolatileRegs
    4443                             ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
    4444                             : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4445 
    4446     /*
    4447      * First check if the guest register value is already in a host register.
    4448      */
    4449     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4450     {
    4451         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4452         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4453         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4454         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4455 
    4456         /* It's not supposed to be allocated... */
    4457         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    4458         {
    4459             /*
    4460              * If the register will trash the guest shadow copy, try find a
    4461              * completely unused register we can use instead.  If that fails,
    4462              * we need to disassociate the host reg from the guest reg.
    4463              */
    4464             /** @todo would be nice to know if preserving the register is in any way helpful. */
    4465             /* If the purpose is calculations, try duplicate the register value as
    4466                we'll be clobbering the shadow. */
    4467             if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
    4468                 && (  ~pReNative->Core.bmHstRegs
    4469                     & ~pReNative->Core.bmHstRegsWithGstShadow
    4470                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
    4471             {
    4472                 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
    4473 
    4474                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4475 
    4476                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    4477                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4478                        g_apszIemNativeHstRegNames[idxRegNew]));
    4479                 idxReg = idxRegNew;
    4480             }
    4481             /* If the current register matches the restrictions, go ahead and allocate
    4482                it for the caller. */
    4483             else if (fRegMask & RT_BIT_32(idxReg))
    4484             {
    4485                 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    4486                 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    4487                 pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4488                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4489                     Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
    4490                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4491                 else
    4492                 {
    4493                     iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    4494                     Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
    4495                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    4496                 }
    4497             }
    4498             /* Otherwise, allocate a register that satisfies the caller and transfer
    4499                the shadowing if compatible with the intended use.  (This basically
    4500                means the call wants a non-volatile register (RSP push/pop scenario).) */
    4501             else
    4502             {
    4503                 Assert(fNoVolatileRegs);
    4504                 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
    4505                                                                     !fNoVolatileRegs
    4506                                                                  && enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4507                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4508                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4509                 {
    4510                     iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    4511                     Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
    4512                            g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
    4513                            g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4514                 }
    4515                 else
    4516                     Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    4517                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4518                            g_apszIemNativeHstRegNames[idxRegNew]));
    4519                 idxReg = idxRegNew;
    4520             }
    4521         }
    4522         else
    4523         {
    4524             /*
    4525              * Oops. Shadowed guest register already allocated!
    4526              *
    4527              * Allocate a new register, copy the value and, if updating, the
    4528              * guest shadow copy assignment to the new register.
    4529              */
    4530             AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    4531                       && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
    4532                       ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
    4533                        idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
    4534 
    4535             /** @todo share register for readonly access. */
    4536             uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
    4537                                                              enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4538 
    4539             if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4540                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4541 
    4542             if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    4543                 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4544                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
    4545                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4546                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    4547             else
    4548             {
    4549                 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    4550                 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
    4551                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4552                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    4553             }
    4554             idxReg = idxRegNew;
    4555         }
    4556         Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
    4557 
    4558 #ifdef VBOX_STRICT
    4559         /* Strict builds: Check that the value is correct. */
    4560         *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    4561 #endif
    4562 
    4563         return idxReg;
    4564     }
    4565 
    4566     /*
    4567      * Allocate a new register, load it with the guest value and designate it as a copy of the
    4568      */
    4569     uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4570 
    4571     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4572         *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
    4573 
    4574     if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4575         iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
    4576     Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
    4577            g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4578 
    4579     return idxRegNew;
    4580 }
    4581 
    4582 
    4583 /**
    4584  * Allocates a temporary host general purpose register that already holds the
    4585  * given guest register value.
    4586  *
    4587  * The use case for this function is places where the shadowing state cannot be
    4588  * modified due to branching and such.  This will fail if the we don't have a
    4589  * current shadow copy handy or if it's incompatible.  The only code that will
    4590  * be emitted here is value checking code in strict builds.
    4591  *
    4592  * The intended use can only be readonly!
    4593  *
    4594  * @returns The host register number, UINT8_MAX if not present.
    4595  * @param   pReNative       The native recompile state.
    4596  * @param   poff            Pointer to the instruction buffer offset.
    4597  *                          Will be updated in strict builds if a register is
    4598  *                          found.
    4599  * @param   enmGstReg       The guest register that will is to be updated.
    4600  * @note    In strict builds, this may throw instruction buffer growth failures.
    4601  *          Non-strict builds will not throw anything.
    4602  * @sa iemNativeRegAllocTmpForGuestReg
    4603  */
    4604 DECL_HIDDEN_THROW(uint8_t)
    4605 iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
    4606 {
    4607     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    4608 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4609     AssertMsg(   pReNative->idxCurCall == 0
    4610               || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4611               || enmGstReg == kIemNativeGstReg_Pc,
    4612               ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
    4613 #endif
    4614 
    4615     /*
    4616      * First check if the guest register value is already in a host register.
    4617      */
    4618     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4619     {
    4620         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4621         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4622         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4623         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4624 
    4625         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    4626         {
    4627             /*
    4628              * We only do readonly use here, so easy compared to the other
    4629              * variant of this code.
    4630              */
    4631             pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    4632             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    4633             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4634             Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
    4635                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    4636 
    4637 #ifdef VBOX_STRICT
    4638             /* Strict builds: Check that the value is correct. */
    4639             *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    4640 #else
    4641             RT_NOREF(poff);
    4642 #endif
    4643             return idxReg;
    4644         }
    4645     }
    4646 
    4647     return UINT8_MAX;
    4648 }
    4649 
    4650 
    4651 /**
    4652  * Allocates argument registers for a function call.
    4653  *
    4654  * @returns New code buffer offset on success; throws VBox status code on failure, so no
    4655  *          need to check the return value.
    4656  * @param   pReNative   The native recompile state.
    4657  * @param   off         The current code buffer offset.
    4658  * @param   cArgs       The number of arguments the function call takes.
    4659  */
    4660 DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
    4661 {
    4662     AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
    4663                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
    4664     Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    4665     Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    4666 
    4667     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    4668         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    4669     else if (cArgs == 0)
    4670         return true;
    4671 
    4672     /*
    4673      * Do we get luck and all register are free and not shadowing anything?
    4674      */
    4675     if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
    4676         for (uint32_t i = 0; i < cArgs; i++)
    4677         {
    4678             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    4679             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    4680             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4681             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4682         }
    4683     /*
    4684      * Okay, not lucky so we have to free up the registers.
    4685      */
    4686     else
    4687         for (uint32_t i = 0; i < cArgs; i++)
    4688         {
    4689             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    4690             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
    4691             {
    4692                 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    4693                 {
    4694                     case kIemNativeWhat_Var:
    4695                     {
    4696                         uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    4697                         IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4698                         AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
    4699                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    4700                         Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
    4701 
    4702                         if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
    4703                             pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
    4704                         else
    4705                         {
    4706                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    4707                             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    4708                         }
    4709                         break;
    4710                     }
    4711 
    4712                     case kIemNativeWhat_Tmp:
    4713                     case kIemNativeWhat_Arg:
    4714                     case kIemNativeWhat_rc:
    4715                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    4716                     default:
    4717                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
    4718                 }
    4719 
    4720             }
    4721             if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    4722             {
    4723                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    4724                 Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    4725                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    4726                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4727                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4728                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4729             }
    4730             else
    4731                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4732             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    4733             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4734         }
    4735     pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
    4736     return true;
    4737 }
    4738 
    4739 
    4740 DECL_HIDDEN_THROW(uint8_t)  iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
    4741 
    4742 
    4743 #if 0
    4744 /**
    4745  * Frees a register assignment of any type.
    4746  *
    4747  * @param   pReNative       The native recompile state.
    4748  * @param   idxHstReg       The register to free.
    4749  *
    4750  * @note    Does not update variables.
    4751  */
    4752 DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4753 {
    4754     Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4755     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4756     Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
    4757     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
    4758            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
    4759            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
    4760            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
    4761     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
    4762            || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
    4763            || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
    4764     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4765            == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4766     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    4767            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4768 
    4769     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxHstReg);
    4770     /* no flushing, right:
    4771     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4772     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4773     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4774     */
    4775 }
    4776 #endif
    4777 
    4778 
    4779 /**
    4780  * Frees a temporary register.
    4781  *
    4782  * Any shadow copies of guest registers assigned to the host register will not
    4783  * be flushed by this operation.
    4784  */
    4785 DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4786 {
    4787     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4788     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
    4789     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    4790     Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
    4791            g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4792 }
    4793 
    4794 
    4795 /**
    4796  * Frees a temporary immediate register.
    4797  *
    4798  * It is assumed that the call has not modified the register, so it still hold
    4799  * the same value as when it was allocated via iemNativeRegAllocTmpImm().
    4800  */
    4801 DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4802 {
    4803     iemNativeRegFreeTmp(pReNative, idxHstReg);
    4804 }
    4805 
    4806 
    4807 /**
    4808  * Frees a register assigned to a variable.
    4809  *
    4810  * The register will be disassociated from the variable.
    4811  */
    4812 DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
    4813 {
    4814     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4815     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
    4816     uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    4817     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4818     Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
    4819 
    4820     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
    4821     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    4822     if (!fFlushShadows)
    4823         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
    4824                g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
    4825     else
    4826     {
    4827         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4828         uint64_t const fGstRegShadowsOld        = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4829         pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4830         pReNative->Core.bmGstRegShadows        &= ~fGstRegShadowsOld;
    4831         uint64_t       fGstRegShadows           = fGstRegShadowsOld;
    4832         while (fGstRegShadows)
    4833         {
    4834             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    4835             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    4836 
    4837             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
    4838             pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
    4839         }
    4840         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
    4841                g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
    4842     }
    4843 }
    4844 
    4845 
    4846 /**
    4847  * Called right before emitting a call instruction to move anything important
    4848  * out of call-volatile registers, free and flush the call-volatile registers,
    4849  * optionally freeing argument variables.
    4850  *
    4851  * @returns New code buffer offset, UINT32_MAX on failure.
    4852  * @param   pReNative       The native recompile state.
    4853  * @param   off             The code buffer offset.
    4854  * @param   cArgs           The number of arguments the function call takes.
    4855  *                          It is presumed that the host register part of these have
    4856  *                          been allocated as such already and won't need moving,
    4857  *                          just freeing.
    4858  * @param   fKeepVars       Mask of variables that should keep their register
    4859  *                          assignments.  Caller must take care to handle these.
    4860  */
    4861 DECL_HIDDEN_THROW(uint32_t)
    4862 iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
    4863 {
    4864     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    4865 
    4866     /* fKeepVars will reduce this mask. */
    4867     uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4868 
    4869     /*
    4870      * Move anything important out of volatile registers.
    4871      */
    4872     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    4873         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    4874     uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4875 #ifdef IEMNATIVE_REG_FIXED_TMP0
    4876                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    4877 #endif
    4878 #ifdef IEMNATIVE_REG_FIXED_TMP1
    4879                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
    4880 #endif
    4881 #ifdef IEMNATIVE_REG_FIXED_PC_DBG
    4882                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
    4883 #endif
    4884                          & ~g_afIemNativeCallRegs[cArgs];
    4885 
    4886     fRegsToMove &= pReNative->Core.bmHstRegs;
    4887     if (!fRegsToMove)
    4888     { /* likely */ }
    4889     else
    4890     {
    4891         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
    4892         while (fRegsToMove != 0)
    4893         {
    4894             unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
    4895             fRegsToMove &= ~RT_BIT_32(idxReg);
    4896 
    4897             switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    4898             {
    4899                 case kIemNativeWhat_Var:
    4900                 {
    4901                     uint8_t const       idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    4902                     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4903                     PIEMNATIVEVAR const pVar   = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    4904                     Assert(pVar->idxReg == idxReg);
    4905                     if (!(RT_BIT_32(idxVar) & fKeepVars))
    4906                     {
    4907                         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
    4908                                idxVar, pVar->enmKind, pVar->idxReg));
    4909                         if (pVar->enmKind != kIemNativeVarKind_Stack)
    4910                             pVar->idxReg = UINT8_MAX;
    4911                         else
    4912                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    4913                     }
    4914                     else
    4915                         fRegsToFree &= ~RT_BIT_32(idxReg);
    4916                     continue;
    4917                 }
    4918 
    4919                 case kIemNativeWhat_Arg:
    4920                     AssertMsgFailed(("What?!?: %u\n", idxReg));
    4921                     continue;
    4922 
    4923                 case kIemNativeWhat_rc:
    4924                 case kIemNativeWhat_Tmp:
    4925                     AssertMsgFailed(("Missing free: %u\n", idxReg));
    4926                     continue;
    4927 
    4928                 case kIemNativeWhat_FixedTmp:
    4929                 case kIemNativeWhat_pVCpuFixed:
    4930                 case kIemNativeWhat_pCtxFixed:
    4931                 case kIemNativeWhat_PcShadow:
    4932                 case kIemNativeWhat_FixedReserved:
    4933                 case kIemNativeWhat_Invalid:
    4934                 case kIemNativeWhat_End:
    4935                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
    4936             }
    4937             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
    4938         }
    4939     }
    4940 
    4941     /*
    4942      * Do the actual freeing.
    4943      */
    4944     if (pReNative->Core.bmHstRegs & fRegsToFree)
    4945         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
    4946                pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
    4947     pReNative->Core.bmHstRegs &= ~fRegsToFree;
    4948 
    4949     /* If there are guest register shadows in any call-volatile register, we
    4950        have to clear the corrsponding guest register masks for each register. */
    4951     uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
    4952     if (fHstRegsWithGstShadow)
    4953     {
    4954         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
    4955                pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
    4956         pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
    4957         do
    4958         {
    4959             unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
    4960             fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4961 
    4962             AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
    4963             pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4964             pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4965         } while (fHstRegsWithGstShadow != 0);
    4966     }
    4967 
    4968     return off;
    4969 }
    4970 
    4971 
    4972 /**
    4973  * Flushes a set of guest register shadow copies.
    4974  *
    4975  * This is usually done after calling a threaded function or a C-implementation
    4976  * of an instruction.
    4977  *
    4978  * @param   pReNative       The native recompile state.
    4979  * @param   fGstRegs        Set of guest registers to flush.
    4980  */
    4981 DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
    4982 {
    4983     /*
    4984      * Reduce the mask by what's currently shadowed
    4985      */
    4986     uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
    4987     fGstRegs &= bmGstRegShadowsOld;
    4988     if (fGstRegs)
    4989     {
    4990         uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
    4991         Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
    4992         pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
    4993         if (bmGstRegShadowsNew)
    4994         {
    4995             /*
    4996              * Partial.
    4997              */
    4998             do
    4999             {
    5000                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    5001                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    5002                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    5003                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    5004                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    5005 
    5006                 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
    5007                 fGstRegs &= ~fInThisHstReg;
    5008                 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
    5009                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    5010                 if (!fGstRegShadowsNew)
    5011                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    5012             } while (fGstRegs != 0);
    5013         }
    5014         else
    5015         {
    5016             /*
    5017              * Clear all.
    5018              */
    5019             do
    5020             {
    5021                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    5022                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    5023                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    5024                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    5025                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    5026 
    5027                 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
    5028                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    5029             } while (fGstRegs != 0);
    5030             pReNative->Core.bmHstRegsWithGstShadow = 0;
    5031         }
    5032     }
    5033 }
    5034 
    5035 
    5036 /**
    5037  * Flushes guest register shadow copies held by a set of host registers.
    5038  *
    5039  * This is used with the TLB lookup code for ensuring that we don't carry on
    5040  * with any guest shadows in volatile registers, as these will get corrupted by
    5041  * a TLB miss.
    5042  *
    5043  * @param   pReNative       The native recompile state.
    5044  * @param   fHstRegs        Set of host registers to flush guest shadows for.
    5045  */
    5046 DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
    5047 {
    5048     /*
    5049      * Reduce the mask by what's currently shadowed.
    5050      */
    5051     uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
    5052     fHstRegs &= bmHstRegsWithGstShadowOld;
    5053     if (fHstRegs)
    5054     {
    5055         uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
    5056         Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
    5057                fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
    5058         pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
    5059         if (bmHstRegsWithGstShadowNew)
    5060         {
    5061             /*
    5062              * Partial (likely).
    5063              */
    5064             uint64_t fGstShadows = 0;
    5065             do
    5066             {
    5067                 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    5068                 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
    5069                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    5070                        == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    5071 
    5072                 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    5073                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    5074                 fHstRegs &= ~RT_BIT_32(idxHstReg);
    5075             } while (fHstRegs != 0);
    5076             pReNative->Core.bmGstRegShadows &= ~fGstShadows;
    5077         }
    5078         else
    5079         {
    5080             /*
    5081              * Clear all.
    5082              */
    5083             do
    5084             {
    5085                 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    5086                 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
    5087                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    5088                        == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    5089 
    5090                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    5091                 fHstRegs &= ~RT_BIT_32(idxHstReg);
    5092             } while (fHstRegs != 0);
    5093             pReNative->Core.bmGstRegShadows = 0;
    5094         }
    5095     }
    5096 }
    5097 
    5098 
    5099 /**
    5100  * Restores guest shadow copies in volatile registers.
    5101  *
    5102  * This is used after calling a helper function (think TLB miss) to restore the
    5103  * register state of volatile registers.
    5104  *
    5105  * @param   pReNative               The native recompile state.
    5106  * @param   off                     The code buffer offset.
    5107  * @param   fHstRegsActiveShadows   Set of host registers which are allowed to
    5108  *                                  be active (allocated) w/o asserting. Hack.
    5109  * @see     iemNativeVarSaveVolatileRegsPreHlpCall(),
    5110  *          iemNativeVarRestoreVolatileRegsPostHlpCall()
    5111  */
    5112 DECL_HIDDEN_THROW(uint32_t)
    5113 iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
    5114 {
    5115     uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    5116     if (fHstRegs)
    5117     {
    5118         Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
    5119         do
    5120         {
    5121             unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    5122 
    5123             /* It's not fatal if a register is active holding a variable that
    5124                shadowing a guest register, ASSUMING all pending guest register
    5125                writes were flushed prior to the helper call. However, we'll be
    5126                emitting duplicate restores, so it wasts code space. */
    5127             Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
    5128             RT_NOREF(fHstRegsActiveShadows);
    5129 
    5130             uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    5131             Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
    5132             AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
    5133                        IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
    5134 
    5135             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    5136             off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
    5137 
    5138             fHstRegs &= ~RT_BIT_32(idxHstReg);
    5139         } while (fHstRegs != 0);
    5140     }
    5141     return off;
    5142 }
    5143 
    514486
    514587#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    514688# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
    5147 static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
     89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    514890{
    514991    /* Compare the shadow with the context value, they should match. */
     
    515496}
    515597# endif
    5156 
    5157 /**
    5158  * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
    5159  */
    5160 static uint32_t
    5161 iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5162 {
    5163     if (pReNative->Core.offPc)
    5164     {
    5165 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5166         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5167         iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
    5168 # endif
    5169 
    5170 # ifndef IEMNATIVE_REG_FIXED_PC_DBG
    5171         /* Allocate a temporary PC register. */
    5172         uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5173 
    5174         /* Perform the addition and store the result. */
    5175         off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
    5176         off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5177 
    5178         /* Free but don't flush the PC register. */
    5179         iemNativeRegFreeTmp(pReNative, idxPcReg);
    5180 # else
    5181         /* Compare the shadow with the context value, they should match. */
    5182         off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
    5183         off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
    5184 # endif
    5185 
    5186         STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
    5187         pReNative->Core.offPc                 = 0;
    5188         pReNative->Core.cInstrPcUpdateSkipped = 0;
    5189     }
    5190 # if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
    5191     else
    5192     {
    5193         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5194         iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
    5195     }
    5196 # endif
    5197 
    5198     return off;
    5199 }
    5200 #endif
    5201 
    5202 
    5203 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    5204 
    5205 
    5206 /*********************************************************************************************************************************
    5207 *   SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge)                            *
    5208 *********************************************************************************************************************************/
    5209 
    5210 /**
    5211  * Info about shadowed guest SIMD register values.
    5212  * @see IEMNATIVEGSTSIMDREG
    5213  */
    5214 static struct
    5215 {
    5216     /** Offset in VMCPU of XMM (low 128-bit) registers. */
    5217     uint32_t    offXmm;
    5218     /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
    5219     uint32_t    offYmm;
    5220     /** Name (for logging). */
    5221     const char *pszName;
    5222 } const g_aGstSimdShadowInfo[] =
    5223 {
    5224 #define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
    5225                                          (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
    5226     /* [kIemNativeGstSimdReg_SimdRegFirst +  0] = */  { CPUMCTX_OFF_AND_SIZE(0),  "ymm0",  },
    5227     /* [kIemNativeGstSimdReg_SimdRegFirst +  1] = */  { CPUMCTX_OFF_AND_SIZE(1),  "ymm1",  },
    5228     /* [kIemNativeGstSimdReg_SimdRegFirst +  2] = */  { CPUMCTX_OFF_AND_SIZE(2),  "ymm2",  },
    5229     /* [kIemNativeGstSimdReg_SimdRegFirst +  3] = */  { CPUMCTX_OFF_AND_SIZE(3),  "ymm3",  },
    5230     /* [kIemNativeGstSimdReg_SimdRegFirst +  4] = */  { CPUMCTX_OFF_AND_SIZE(4),  "ymm4",  },
    5231     /* [kIemNativeGstSimdReg_SimdRegFirst +  5] = */  { CPUMCTX_OFF_AND_SIZE(5),  "ymm5",  },
    5232     /* [kIemNativeGstSimdReg_SimdRegFirst +  6] = */  { CPUMCTX_OFF_AND_SIZE(6),  "ymm6",  },
    5233     /* [kIemNativeGstSimdReg_SimdRegFirst +  7] = */  { CPUMCTX_OFF_AND_SIZE(7),  "ymm7",  },
    5234     /* [kIemNativeGstSimdReg_SimdRegFirst +  8] = */  { CPUMCTX_OFF_AND_SIZE(8),  "ymm8",  },
    5235     /* [kIemNativeGstSimdReg_SimdRegFirst +  9] = */  { CPUMCTX_OFF_AND_SIZE(9),  "ymm9",  },
    5236     /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */  { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
    5237     /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */  { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
    5238     /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */  { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
    5239     /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */  { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
    5240     /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */  { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
    5241     /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */  { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
    5242 #undef CPUMCTX_OFF_AND_SIZE
    5243 };
    5244 AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
    5245 
    5246 
    5247 #ifdef LOG_ENABLED
    5248 /** Host CPU SIMD register names. */
    5249 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
    5250 {
    5251 #ifdef RT_ARCH_AMD64
    5252     "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
    5253 #elif RT_ARCH_ARM64
    5254     "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",  "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
    5255     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
    5256 #else
    5257 # error "port me"
    5258 #endif
    5259 };
    5260 #endif
    5261 
    5262 
    5263 DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
    5264                                                          IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
    5265 {
    5266     pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
    5267 
    5268     pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat        = enmWhat;
    5269     pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
    5270     RT_NOREF(idxVar);
    5271     return idxSimdReg;
    5272 }
    5273 
    5274 
    5275 /**
    5276  * Frees a temporary SIMD register.
    5277  *
    5278  * Any shadow copies of guest registers assigned to the host register will not
    5279  * be flushed by this operation.
    5280  */
    5281 DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
    5282 {
    5283     Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
    5284     Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
    5285     pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
    5286     Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
    5287            g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
    5288 }
    5289 
    5290 
    5291 /**
    5292  * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
    5293  *
    5294  * @returns New code bufferoffset.
    5295  * @param   pReNative       The native recompile state.
    5296  * @param   off             Current code buffer position.
    5297  * @param   enmGstSimdReg   The guest SIMD register to flush.
    5298  */
    5299 static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
    5300 {
    5301     uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
    5302 
    5303     Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
    5304            g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
    5305            IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
    5306            IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
    5307 
    5308     if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
    5309     {
    5310         Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    5311                || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
    5312         off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    5313     }
    5314 
    5315     if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
    5316     {
    5317         Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    5318                || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
    5319         off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    5320     }
    5321 
    5322     IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
    5323     return off;
    5324 }
    5325 
    5326 
    5327 /**
    5328  * Locate a register, possibly freeing one up.
    5329  *
    5330  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    5331  * failed.
    5332  *
    5333  * @returns Host register number on success. Returns UINT8_MAX if no registers
    5334  *          found, the caller is supposed to deal with this and raise a
    5335  *          allocation type specific status code (if desired).
    5336  *
    5337  * @throws  VBox status code if we're run into trouble spilling a variable of
    5338  *          recording debug info.  Does NOT throw anything if we're out of
    5339  *          registers, though.
    5340  */
    5341 static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
    5342                                              uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
    5343 {
    5344     //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
    5345     Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
    5346     Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
    5347 
    5348     /*
    5349      * Try a freed register that's shadowing a guest register.
    5350      */
    5351     uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
    5352     if (fRegs)
    5353     {
    5354         //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
    5355 
    5356 #if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    5357         /*
    5358          * When we have livness information, we use it to kick out all shadowed
    5359          * guest register that will not be needed any more in this TB.  If we're
    5360          * lucky, this may prevent us from ending up here again.
    5361          *
    5362          * Note! We must consider the previous entry here so we don't free
    5363          *       anything that the current threaded function requires (current
    5364          *       entry is produced by the next threaded function).
    5365          */
    5366         uint32_t const idxCurCall = pReNative->idxCurCall;
    5367         if (idxCurCall > 0)
    5368         {
    5369             PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
    5370 
    5371 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    5372             /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
    5373             AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
    5374             uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
    5375 #else
    5376             /* Construct a mask of the registers not in the read or write state.
    5377                Note! We could skips writes, if they aren't from us, as this is just
    5378                      a hack to prevent trashing registers that have just been written
    5379                      or will be written when we retire the current instruction. */
    5380             uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    5381                                  & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    5382                                  & IEMLIVENESSBIT_MASK;
    5383 #endif
    5384             /* If it matches any shadowed registers. */
    5385             if (pReNative->Core.bmGstRegShadows & fToFreeMask)
    5386             {
    5387                 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
    5388                 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
    5389                 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
    5390 
    5391                 /* See if we've got any unshadowed registers we can return now. */
    5392                 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
    5393                 if (fUnshadowedRegs)
    5394                 {
    5395                     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
    5396                     return (fPreferVolatile
    5397                             ? ASMBitFirstSetU32(fUnshadowedRegs)
    5398                             : ASMBitLastSetU32(  fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    5399                                                ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
    5400                          - 1;
    5401                 }
    5402             }
    5403         }
    5404 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    5405 
    5406         unsigned const idxReg = (fPreferVolatile
    5407                                  ? ASMBitFirstSetU32(fRegs)
    5408                                  : ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    5409                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
    5410                               - 1;
    5411 
    5412         Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
    5413         Assert(   (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
    5414                == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
    5415         Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
    5416         Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
    5417 
    5418         /* We need to flush any pending guest register writes this host SIMD register shadows. */
    5419         uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
    5420         uint32_t idxGstSimdReg = 0;
    5421         do
    5422         {
    5423             if (fGstRegShadows & 0x1)
    5424                 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
    5425             Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
    5426             idxGstSimdReg++;
    5427             fGstRegShadows >>= 1;
    5428         } while (fGstRegShadows);
    5429 
    5430         pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    5431         pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
    5432         pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
    5433         pReNative->Core.aHstSimdRegs[idxReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    5434         return idxReg;
    5435     }
    5436 
    5437     /*
    5438      * Try free up a variable that's in a register.
    5439      *
    5440      * We do two rounds here, first evacuating variables we don't need to be
    5441      * saved on the stack, then in the second round move things to the stack.
    5442      */
    5443     //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
    5444     AssertReleaseFailed(); /** @todo No variable support right now. */
    5445 #if 0
    5446     for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
    5447     {
    5448         uint32_t fVars = pReNative->Core.bmSimdVars;
    5449         while (fVars)
    5450         {
    5451             uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
    5452             uint8_t const  idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
    5453             if (   idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
    5454                 && (RT_BIT_32(idxReg) & fRegMask)
    5455                 && (  iLoop == 0
    5456                     ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
    5457                     : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    5458                 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
    5459             {
    5460                 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
    5461                 Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
    5462                        == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
    5463                 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    5464                 Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    5465                        == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
    5466 
    5467                 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    5468                 {
    5469                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    5470                     *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
    5471                 }
    5472 
    5473                 pReNative->Core.aSimdVars[idxVar].idxReg    = UINT8_MAX;
    5474                 pReNative->Core.bmHstSimdRegs              &= ~RT_BIT_32(idxReg);
    5475 
    5476                 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    5477                 pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
    5478                 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
    5479                 return idxReg;
    5480             }
    5481             fVars &= ~RT_BIT_32(idxVar);
    5482         }
    5483     }
    5484 #endif
    5485 
    5486     AssertFailed();
    5487     return UINT8_MAX;
    5488 }
    5489 
    5490 
    5491 /**
    5492  * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
    5493  * SIMD register @a enmGstSimdReg.
    5494  *
    5495  * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
    5496  * host register before calling.
    5497  */
    5498 DECL_FORCE_INLINE(void)
    5499 iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
    5500 {
    5501     Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
    5502     Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
    5503     Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
    5504 
    5505     pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]       = idxHstSimdReg;
    5506     pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
    5507     pReNative->Core.bmGstSimdRegShadows                        |= RT_BIT_64(enmGstSimdReg);
    5508     pReNative->Core.bmHstSimdRegsWithGstShadow                 |= RT_BIT_32(idxHstSimdReg);
    5509 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5510     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5511     iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
    5512 #else
    5513     RT_NOREF(off);
    5514 #endif
    5515 }
    5516 
    5517 
    5518 /**
    5519  * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
    5520  * to @a idxSimdRegTo.
    5521  */
    5522 DECL_FORCE_INLINE(void)
    5523 iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
    5524                                             IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
    5525 {
    5526     Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
    5527     Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
    5528     Assert(      (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
    5529               == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
    5530            && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    5531     Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
    5532            == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
    5533     Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
    5534            == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
    5535     Assert(   pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
    5536            == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
    5537 
    5538 
    5539     uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
    5540     pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
    5541     if (!fGstRegShadowsFrom)
    5542     {
    5543         pReNative->Core.bmHstSimdRegsWithGstShadow               &= ~RT_BIT_32(idxSimdRegFrom);
    5544         pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded    = kIemNativeGstSimdRegLdStSz_Invalid;
    5545     }
    5546     pReNative->Core.bmHstSimdRegsWithGstShadow                |= RT_BIT_32(idxSimdRegTo);
    5547     pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
    5548     pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]       = idxSimdRegTo;
    5549 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5550     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5551     iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
    5552 #else
    5553     RT_NOREF(off);
    5554 #endif
    5555 }
    5556 
    5557 
    5558 /**
    5559  * Clear any guest register shadow claims from @a idxHstSimdReg.
    5560  *
    5561  * The register does not need to be shadowing any guest registers.
    5562  */
    5563 DECL_FORCE_INLINE(void)
    5564 iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
    5565 {
    5566     Assert(      (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
    5567               == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
    5568            && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
    5569     Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
    5570            == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
    5571     Assert(   !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
    5572            && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
    5573 
    5574 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5575     uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
    5576     if (fGstRegs)
    5577     {
    5578         Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
    5579         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5580         while (fGstRegs)
    5581         {
    5582             unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    5583             fGstRegs &= ~RT_BIT_64(iGstReg);
    5584             iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
    5585         }
    5586     }
    5587 #else
    5588     RT_NOREF(off);
    5589 #endif
    5590 
    5591     pReNative->Core.bmHstSimdRegsWithGstShadow        &= ~RT_BIT_32(idxHstSimdReg);
    5592     pReNative->Core.bmGstSimdRegShadows               &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
    5593     pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
    5594     pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    5595 }
    5596 
    5597 
    5598 /**
    5599  * Flushes a set of guest register shadow copies.
    5600  *
    5601  * This is usually done after calling a threaded function or a C-implementation
    5602  * of an instruction.
    5603  *
    5604  * @param   pReNative       The native recompile state.
    5605  * @param   fGstSimdRegs    Set of guest SIMD registers to flush.
    5606  */
    5607 DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
    5608 {
    5609     /*
    5610      * Reduce the mask by what's currently shadowed
    5611      */
    5612     uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
    5613     fGstSimdRegs &= bmGstSimdRegShadows;
    5614     if (fGstSimdRegs)
    5615     {
    5616         uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
    5617         Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
    5618         pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
    5619         if (bmGstSimdRegShadowsNew)
    5620         {
    5621             /*
    5622              * Partial.
    5623              */
    5624             do
    5625             {
    5626                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
    5627                 uint8_t const  idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
    5628                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
    5629                 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
    5630                 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    5631                 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
    5632 
    5633                 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
    5634                 fGstSimdRegs &= ~fInThisHstReg;
    5635                 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
    5636                 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    5637                 if (!fGstRegShadowsNew)
    5638                 {
    5639                     pReNative->Core.bmHstSimdRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
    5640                     pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded  = kIemNativeGstSimdRegLdStSz_Invalid;
    5641                 }
    5642             } while (fGstSimdRegs != 0);
    5643         }
    5644         else
    5645         {
    5646             /*
    5647              * Clear all.
    5648              */
    5649             do
    5650             {
    5651                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
    5652                 uint8_t const  idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
    5653                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
    5654                 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
    5655                 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    5656                 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
    5657 
    5658                 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
    5659                 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
    5660                 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    5661             } while (fGstSimdRegs != 0);
    5662             pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
    5663         }
    5664     }
    5665 }
    5666 
    5667 
    5668 /**
    5669  * Allocates a temporary host SIMD register.
    5670  *
    5671  * This may emit code to save register content onto the stack in order to free
    5672  * up a register.
    5673  *
    5674  * @returns The host register number; throws VBox status code on failure,
    5675  *          so no need to check the return value.
    5676  * @param   pReNative       The native recompile state.
    5677  * @param   poff            Pointer to the variable with the code buffer position.
    5678  *                          This will be update if we need to move a variable from
    5679  *                          register to stack in order to satisfy the request.
    5680  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    5681  *                          registers (@c true, default) or the other way around
    5682  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    5683  */
    5684 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
    5685 {
    5686     /*
    5687      * Try find a completely unused register, preferably a call-volatile one.
    5688      */
    5689     uint8_t  idxSimdReg;
    5690     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    5691                    & ~pReNative->Core.bmHstRegsWithGstShadow
    5692                    & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
    5693     if (fRegs)
    5694     {
    5695         if (fPreferVolatile)
    5696             idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    5697                                                     ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    5698         else
    5699             idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    5700                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    5701         Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
    5702         Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
    5703         Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    5704     }
    5705     else
    5706     {
    5707         idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
    5708         AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    5709         Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    5710     }
    5711 
    5712     Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
    5713     return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
    5714 }
    5715 
    5716 
    5717 /**
    5718  * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
    5719  * registers.
    5720  *
    5721  * @returns The host register number; throws VBox status code on failure,
    5722  *          so no need to check the return value.
    5723  * @param   pReNative       The native recompile state.
    5724  * @param   poff            Pointer to the variable with the code buffer position.
    5725  *                          This will be update if we need to move a variable from
    5726  *                          register to stack in order to satisfy the request.
    5727  * @param   fRegMask        Mask of acceptable registers.
    5728  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    5729  *                          registers (@c true, default) or the other way around
    5730  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    5731  */
    5732 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
    5733                                                       bool fPreferVolatile /*= true*/)
    5734 {
    5735     Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
    5736     Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
    5737 
    5738     /*
    5739      * Try find a completely unused register, preferably a call-volatile one.
    5740      */
    5741     uint8_t  idxSimdReg;
    5742     uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
    5743                    & ~pReNative->Core.bmHstSimdRegsWithGstShadow
    5744                    & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
    5745                    & fRegMask;
    5746     if (fRegs)
    5747     {
    5748         if (fPreferVolatile)
    5749             idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    5750                                                     ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    5751         else
    5752             idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    5753                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    5754         Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
    5755         Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
    5756         Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    5757     }
    5758     else
    5759     {
    5760         idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
    5761         AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    5762         Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    5763     }
    5764 
    5765     Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
    5766     return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
    5767 }
    5768 
    5769 
    5770 /**
    5771  * Sets the indiactor for which part of the given SIMD register has valid data loaded.
    5772  *
    5773  * @param   pReNative       The native recompile state.
    5774  * @param   idxHstSimdReg   The host SIMD register to update the state for.
    5775  * @param   enmLoadSz       The load size to set.
    5776  */
    5777 DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    5778 {
    5779     /* Everything valid already? -> nothing to do. */
    5780     if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
    5781         return;
    5782 
    5783     if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
    5784         pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
    5785     else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
    5786     {
    5787         Assert(   (   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
    5788                    && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
    5789                || (   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
    5790                    && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
    5791         pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
    5792     }
    5793 }
    5794 
    5795 
    5796 static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
    5797                                                             uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
    5798 {
    5799     /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
    5800     if (   pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
    5801         || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
    5802     {
    5803 # ifdef RT_ARCH_ARM64
    5804         /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
    5805         Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
    5806 # endif
    5807 
    5808         if (idxHstSimdRegDst != idxHstSimdRegSrc)
    5809         {
    5810             switch (enmLoadSzDst)
    5811             {
    5812                 case kIemNativeGstSimdRegLdStSz_256:
    5813                     off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
    5814                     break;
    5815                 case kIemNativeGstSimdRegLdStSz_Low128:
    5816                     off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
    5817                     break;
    5818                 case kIemNativeGstSimdRegLdStSz_High128:
    5819                     off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
    5820                     break;
    5821                 default:
    5822                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    5823             }
    5824 
    5825             iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
    5826         }
    5827     }
    5828     else
    5829     {
    5830         /* Complicated stuff where the source is currently missing something, later. */
    5831         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    5832     }
    5833 
    5834     return off;
    5835 }
    5836 
    5837 
    5838 /**
    5839  * Allocates a temporary host SIMD register for keeping a guest
    5840  * SIMD register value.
    5841  *
    5842  * Since we may already have a register holding the guest register value,
    5843  * code will be emitted to do the loading if that's not the case. Code may also
    5844  * be emitted if we have to free up a register to satify the request.
    5845  *
    5846  * @returns The host register number; throws VBox status code on failure, so no
    5847  *          need to check the return value.
    5848  * @param   pReNative       The native recompile state.
    5849  * @param   poff            Pointer to the variable with the code buffer
    5850  *                          position. This will be update if we need to move a
    5851  *                          variable from register to stack in order to satisfy
    5852  *                          the request.
    5853  * @param   enmGstSimdReg   The guest SIMD register that will is to be updated.
    5854  * @param   enmIntendedUse  How the caller will be using the host register.
    5855  * @param   fNoVolatileRegs Set if no volatile register allowed, clear if any
    5856  *                          register is okay (default).  The ASSUMPTION here is
    5857  *                          that the caller has already flushed all volatile
    5858  *                          registers, so this is only applied if we allocate a
    5859  *                          new register.
    5860  * @sa      iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
    5861  */
    5862 DECL_HIDDEN_THROW(uint8_t)
    5863 iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    5864                                         IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
    5865                                         bool fNoVolatileRegs /*= false*/)
    5866 {
    5867     Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
    5868 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
    5869     AssertMsg(   pReNative->idxCurCall == 0
    5870               || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    5871                   ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
    5872                   : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
    5873                   ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
    5874                   : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(  iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
    5875               ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
    5876 #endif
    5877 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)
    5878     static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
    5879 #endif
    5880     uint32_t const fRegMask = !fNoVolatileRegs
    5881                             ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
    5882                             : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
    5883 
    5884     /*
    5885      * First check if the guest register value is already in a host register.
    5886      */
    5887     if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
    5888     {
    5889         uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
    5890         Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
    5891         Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
    5892         Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
    5893 
    5894         /* It's not supposed to be allocated... */
    5895         if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
    5896         {
    5897             /*
    5898              * If the register will trash the guest shadow copy, try find a
    5899              * completely unused register we can use instead.  If that fails,
    5900              * we need to disassociate the host reg from the guest reg.
    5901              */
    5902             /** @todo would be nice to know if preserving the register is in any way helpful. */
    5903             /* If the purpose is calculations, try duplicate the register value as
    5904                we'll be clobbering the shadow. */
    5905             if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
    5906                 && (  ~pReNative->Core.bmHstSimdRegs
    5907                     & ~pReNative->Core.bmHstSimdRegsWithGstShadow
    5908                     & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
    5909             {
    5910                 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
    5911 
    5912                 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
    5913 
    5914                 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
    5915                        g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
    5916                        g_apszIemNativeHstSimdRegNames[idxRegNew]));
    5917                 idxSimdReg = idxRegNew;
    5918             }
    5919             /* If the current register matches the restrictions, go ahead and allocate
    5920                it for the caller. */
    5921             else if (fRegMask & RT_BIT_32(idxSimdReg))
    5922             {
    5923                 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
    5924                 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
    5925                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    5926                 {
    5927                     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    5928                         *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
    5929                     else
    5930                         iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
    5931                     Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
    5932                            g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    5933                 }
    5934                 else
    5935                 {
    5936                     iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
    5937                     Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
    5938                            g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
    5939                 }
    5940             }
    5941             /* Otherwise, allocate a register that satisfies the caller and transfer
    5942                the shadowing if compatible with the intended use.  (This basically
    5943                means the call wants a non-volatile register (RSP push/pop scenario).) */
    5944             else
    5945             {
    5946                 Assert(fNoVolatileRegs);
    5947                 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
    5948                                                                     !fNoVolatileRegs
    5949                                                                  && enmIntendedUse == kIemNativeGstRegUse_Calculation);
    5950                 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
    5951                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    5952                 {
    5953                     iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
    5954                     Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
    5955                            g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
    5956                            g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    5957                 }
    5958                 else
    5959                     Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
    5960                            g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
    5961                            g_apszIemNativeHstSimdRegNames[idxRegNew]));
    5962                 idxSimdReg = idxRegNew;
    5963             }
    5964         }
    5965         else
    5966         {
    5967             /*
    5968              * Oops. Shadowed guest register already allocated!
    5969              *
    5970              * Allocate a new register, copy the value and, if updating, the
    5971              * guest shadow copy assignment to the new register.
    5972              */
    5973             AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    5974                       && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
    5975                       ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
    5976                        idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
    5977 
    5978             /** @todo share register for readonly access. */
    5979             uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
    5980                                                                  enmIntendedUse == kIemNativeGstRegUse_Calculation);
    5981 
    5982             if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    5983                 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
    5984             else
    5985                 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
    5986 
    5987             if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    5988                 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    5989                 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
    5990                        g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
    5991                        g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    5992             else
    5993             {
    5994                 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
    5995                 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
    5996                        g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
    5997                        g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    5998             }
    5999             idxSimdReg = idxRegNew;
    6000         }
    6001         Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
    6002 
    6003 #ifdef VBOX_STRICT
    6004         /* Strict builds: Check that the value is correct. */
    6005         if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    6006             *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
    6007 #endif
    6008 
    6009         return idxSimdReg;
    6010     }
    6011 
    6012     /*
    6013      * Allocate a new register, load it with the guest value and designate it as a copy of the
    6014      */
    6015     uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
    6016 
    6017     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    6018         *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
    6019     else
    6020         iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
    6021 
    6022     if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    6023         iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
    6024 
    6025     Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
    6026            g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    6027 
    6028     return idxRegNew;
    6029 }
    6030 
    6031 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    6032 
    6033 
    6034 
    6035 /*********************************************************************************************************************************
    6036 *   Code emitters for flushing pending guest register writes and sanity checks                                                   *
    6037 *********************************************************************************************************************************/
     98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING  */
    603899
    6039100/**
     
    6046107 * RIP updates, since these are the most common ones.
    6047108 */
    6048 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6049                                                                   IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
     109DECL_INLINE_THROW(uint32_t)
     110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
    6050111{
    6051112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     
    6068129    return off;
    6069130}
    6070 
    6071 
    6072 /**
    6073  * Flushes any delayed guest register writes.
    6074  *
    6075  * This must be called prior to calling CImpl functions and any helpers that use
    6076  * the guest state (like raising exceptions) and such.
    6077  *
    6078  * This optimization has not yet been implemented.  The first target would be
    6079  * RIP updates, since these are the most common ones.
    6080  */
    6081 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
    6082                                                            bool fFlushShadows /*= true*/)
    6083 {
    6084 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    6085     if (!(fGstShwExcept & kIemNativeGstReg_Pc))
    6086         off = iemNativeEmitPcWriteback(pReNative, off);
    6087 #else
    6088     RT_NOREF(pReNative, fGstShwExcept);
    6089 #endif
    6090 
    6091 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    6092     /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
    6093     for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
    6094     {
    6095         Assert(   (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
    6096                || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
    6097 
    6098         if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
    6099             off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
    6100 
    6101         if (   fFlushShadows
    6102             && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
    6103         {
    6104             uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
    6105 
    6106             iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
    6107             iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
    6108         }
    6109     }
    6110 #else
    6111     RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
    6112 #endif
    6113 
    6114     return off;
    6115 }
    6116 
    6117 
    6118 #ifdef VBOX_STRICT
    6119 /**
    6120  * Does internal register allocator sanity checks.
    6121  */
    6122 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
    6123 {
    6124     /*
    6125      * Iterate host registers building a guest shadowing set.
    6126      */
    6127     uint64_t bmGstRegShadows        = 0;
    6128     uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
    6129     AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
    6130     while (bmHstRegsWithGstShadow)
    6131     {
    6132         unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
    6133         Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    6134         bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    6135 
    6136         uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    6137         AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
    6138         AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
    6139         bmGstRegShadows |= fThisGstRegShadows;
    6140         while (fThisGstRegShadows)
    6141         {
    6142             unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
    6143             fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
    6144             AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
    6145                       ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
    6146                        idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
    6147         }
    6148     }
    6149     AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
    6150               ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
    6151                bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
    6152 
    6153     /*
    6154      * Now the other way around, checking the guest to host index array.
    6155      */
    6156     bmHstRegsWithGstShadow = 0;
    6157     bmGstRegShadows        = pReNative->Core.bmGstRegShadows;
    6158     Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    6159     while (bmGstRegShadows)
    6160     {
    6161         unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
    6162         Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    6163         bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
    6164 
    6165         uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    6166         AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
    6167         AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
    6168                   ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
    6169                    idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    6170         bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
    6171     }
    6172     AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
    6173               ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
    6174                bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
    6175 }
    6176 #endif
    6177 
    6178 
    6179 /*********************************************************************************************************************************
    6180 *   Code Emitters (larger snippets)                                                                                              *
    6181 *********************************************************************************************************************************/
    6182 
    6183 /**
    6184  * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
    6185  * extending to 64-bit width.
    6186  *
    6187  * @returns New code buffer offset on success, UINT32_MAX on failure.
    6188  * @param   pReNative   .
    6189  * @param   off         The current code buffer position.
    6190  * @param   idxHstReg   The host register to load the guest register value into.
    6191  * @param   enmGstReg   The guest register to load.
    6192  *
    6193  * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
    6194  *       that is something the caller needs to do if applicable.
    6195  */
    6196 DECL_HIDDEN_THROW(uint32_t)
    6197 iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
    6198 {
    6199     Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
    6200     Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
    6201 
    6202     switch (g_aGstShadowInfo[enmGstReg].cb)
    6203     {
    6204         case sizeof(uint64_t):
    6205             return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    6206         case sizeof(uint32_t):
    6207             return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    6208         case sizeof(uint16_t):
    6209             return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    6210 #if 0 /* not present in the table. */
    6211         case sizeof(uint8_t):
    6212             return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    6213 #endif
    6214         default:
    6215             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    6216     }
    6217 }
    6218 
    6219 
    6220 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    6221 /**
    6222  * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
    6223  *
    6224  * @returns New code buffer offset on success, UINT32_MAX on failure.
    6225  * @param   pReNative       The recompiler state.
    6226  * @param   off             The current code buffer position.
    6227  * @param   idxHstSimdReg   The host register to load the guest register value into.
    6228  * @param   enmGstSimdReg   The guest register to load.
    6229  * @param   enmLoadSz       The load size of the register.
    6230  *
    6231  * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
    6232  *       that is something the caller needs to do if applicable.
    6233  */
    6234 DECL_HIDDEN_THROW(uint32_t)
    6235 iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
    6236                                              IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    6237 {
    6238     Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
    6239 
    6240     iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
    6241     switch (enmLoadSz)
    6242     {
    6243         case kIemNativeGstSimdRegLdStSz_256:
    6244             off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    6245             return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    6246         case kIemNativeGstSimdRegLdStSz_Low128:
    6247             return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    6248         case kIemNativeGstSimdRegLdStSz_High128:
    6249             return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    6250         default:
    6251             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    6252     }
    6253 }
    6254 #endif
    6255 
    6256 #ifdef VBOX_STRICT
    6257 /**
    6258  * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
    6259  *
    6260  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    6261  *       Trashes EFLAGS on AMD64.
    6262  */
    6263 static uint32_t
    6264 iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
    6265 {
    6266 # ifdef RT_ARCH_AMD64
    6267     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    6268 
    6269     /* rol reg64, 32 */
    6270     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    6271     pbCodeBuf[off++] = 0xc1;
    6272     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    6273     pbCodeBuf[off++] = 32;
    6274 
    6275     /* test reg32, ffffffffh */
    6276     if (idxReg >= 8)
    6277         pbCodeBuf[off++] = X86_OP_REX_B;
    6278     pbCodeBuf[off++] = 0xf7;
    6279     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    6280     pbCodeBuf[off++] = 0xff;
    6281     pbCodeBuf[off++] = 0xff;
    6282     pbCodeBuf[off++] = 0xff;
    6283     pbCodeBuf[off++] = 0xff;
    6284 
    6285     /* je/jz +1 */
    6286     pbCodeBuf[off++] = 0x74;
    6287     pbCodeBuf[off++] = 0x01;
    6288 
    6289     /* int3 */
    6290     pbCodeBuf[off++] = 0xcc;
    6291 
    6292     /* rol reg64, 32 */
    6293     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    6294     pbCodeBuf[off++] = 0xc1;
    6295     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    6296     pbCodeBuf[off++] = 32;
    6297 
    6298 # elif defined(RT_ARCH_ARM64)
    6299     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    6300     /* lsr tmp0, reg64, #32 */
    6301     pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
    6302     /* cbz tmp0, +1 */
    6303     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    6304     /* brk #0x1100 */
    6305     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
    6306 
    6307 # else
    6308 #  error "Port me!"
    6309 # endif
    6310     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6311     return off;
    6312 }
    6313 #endif /* VBOX_STRICT */
    6314 
    6315 
    6316 #ifdef VBOX_STRICT
    6317 /**
    6318  * Emitting code that checks that the content of register @a idxReg is the same
    6319  * as what's in the guest register @a enmGstReg, resulting in a breakpoint
    6320  * instruction if that's not the case.
    6321  *
    6322  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    6323  *       Trashes EFLAGS on AMD64.
    6324  */
    6325 static uint32_t
    6326 iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
    6327 {
    6328 # ifdef RT_ARCH_AMD64
    6329     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    6330 
    6331     /* cmp reg, [mem] */
    6332     if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
    6333     {
    6334         if (idxReg >= 8)
    6335             pbCodeBuf[off++] = X86_OP_REX_R;
    6336         pbCodeBuf[off++] = 0x38;
    6337     }
    6338     else
    6339     {
    6340         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
    6341             pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
    6342         else
    6343         {
    6344             if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
    6345                 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    6346             else
    6347                 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
    6348                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
    6349             if (idxReg >= 8)
    6350                 pbCodeBuf[off++] = X86_OP_REX_R;
    6351         }
    6352         pbCodeBuf[off++] = 0x39;
    6353     }
    6354     off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
    6355 
    6356     /* je/jz +1 */
    6357     pbCodeBuf[off++] = 0x74;
    6358     pbCodeBuf[off++] = 0x01;
    6359 
    6360     /* int3 */
    6361     pbCodeBuf[off++] = 0xcc;
    6362 
    6363     /* For values smaller than the register size, we must check that the rest
    6364        of the register is all zeros. */
    6365     if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
    6366     {
    6367         /* test reg64, imm32 */
    6368         pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    6369         pbCodeBuf[off++] = 0xf7;
    6370         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    6371         pbCodeBuf[off++] = 0;
    6372         pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
    6373         pbCodeBuf[off++] = 0xff;
    6374         pbCodeBuf[off++] = 0xff;
    6375 
    6376         /* je/jz +1 */
    6377         pbCodeBuf[off++] = 0x74;
    6378         pbCodeBuf[off++] = 0x01;
    6379 
    6380         /* int3 */
    6381         pbCodeBuf[off++] = 0xcc;
    6382         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6383     }
    6384     else
    6385     {
    6386         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6387         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
    6388             iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
    6389     }
    6390 
    6391 # elif defined(RT_ARCH_ARM64)
    6392     /* mov TMP0, [gstreg] */
    6393     off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
    6394 
    6395     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    6396     /* sub tmp0, tmp0, idxReg */
    6397     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
    6398     /* cbz tmp0, +1 */
    6399     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    6400     /* brk #0x1000+enmGstReg */
    6401     pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
    6402     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6403 
    6404 # else
    6405 #  error "Port me!"
    6406 # endif
    6407     return off;
    6408 }
    6409 
    6410 
    6411 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    6412 /**
    6413  * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
    6414  * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
    6415  * instruction if that's not the case.
    6416  *
    6417  * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
    6418  *       Trashes EFLAGS on AMD64.
    6419  */
    6420 static uint32_t
    6421 iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    6422                                     IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    6423 {
    6424     /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
    6425     if (   (   enmLoadSz == kIemNativeGstSimdRegLdStSz_256
    6426             && (   IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
    6427                 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
    6428         || (   enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
    6429             && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
    6430         || (   enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
    6431             && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
    6432         return off;
    6433 
    6434 #  ifdef RT_ARCH_AMD64
    6435     Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
    6436 
    6437     /* movdqa vectmp0, idxSimdReg */
    6438     off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
    6439 
    6440     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
    6441 
    6442     /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
    6443     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    6444     if (idxSimdReg >= 8)
    6445         pbCodeBuf[off++] = X86_OP_REX_R;
    6446     pbCodeBuf[off++] = 0x0f;
    6447     pbCodeBuf[off++] = 0x38;
    6448     pbCodeBuf[off++] = 0x29;
    6449     off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    6450 
    6451     /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
    6452     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    6453     pbCodeBuf[off++] =   X86_OP_REX_W
    6454                        | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
    6455                        | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
    6456     pbCodeBuf[off++] = 0x0f;
    6457     pbCodeBuf[off++] = 0x3a;
    6458     pbCodeBuf[off++] = 0x16;
    6459     pbCodeBuf[off++] = 0xeb;
    6460     pbCodeBuf[off++] = 0x00;
    6461 
    6462     /* test tmp0, 0xffffffff. */
    6463     pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
    6464     pbCodeBuf[off++] = 0xf7;
    6465     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
    6466     pbCodeBuf[off++] = 0xff;
    6467     pbCodeBuf[off++] = 0xff;
    6468     pbCodeBuf[off++] = 0xff;
    6469     pbCodeBuf[off++] = 0xff;
    6470 
    6471     /* je/jz +1 */
    6472     pbCodeBuf[off++] = 0x74;
    6473     pbCodeBuf[off++] = 0x01;
    6474 
    6475     /* int3 */
    6476     pbCodeBuf[off++] = 0xcc;
    6477 
    6478     /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
    6479     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    6480     pbCodeBuf[off++] =   X86_OP_REX_W
    6481                        | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
    6482                        | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
    6483     pbCodeBuf[off++] = 0x0f;
    6484     pbCodeBuf[off++] = 0x3a;
    6485     pbCodeBuf[off++] = 0x16;
    6486     pbCodeBuf[off++] = 0xeb;
    6487     pbCodeBuf[off++] = 0x01;
    6488 
    6489     /* test tmp0, 0xffffffff. */
    6490     pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
    6491     pbCodeBuf[off++] = 0xf7;
    6492     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
    6493     pbCodeBuf[off++] = 0xff;
    6494     pbCodeBuf[off++] = 0xff;
    6495     pbCodeBuf[off++] = 0xff;
    6496     pbCodeBuf[off++] = 0xff;
    6497 
    6498     /* je/jz +1 */
    6499     pbCodeBuf[off++] = 0x74;
    6500     pbCodeBuf[off++] = 0x01;
    6501 
    6502     /* int3 */
    6503     pbCodeBuf[off++] = 0xcc;
    6504 
    6505 #  elif defined(RT_ARCH_ARM64)
    6506     /* mov vectmp0, [gstreg] */
    6507     off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
    6508 
    6509     if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
    6510     {
    6511         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    6512         /* eor vectmp0, vectmp0, idxSimdReg */
    6513         pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
    6514         /* cnt vectmp0, vectmp0, #0*/
    6515         pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
    6516         /* umov tmp0, vectmp0.D[0] */
    6517         pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
    6518                                                     0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
    6519         /* cbz tmp0, +1 */
    6520         pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    6521         /* brk #0x1000+enmGstReg */
    6522         pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
    6523     }
    6524 
    6525     if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
    6526     {
    6527         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    6528         /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
    6529         pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
    6530         /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
    6531         pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
    6532         /* umov tmp0, (vectmp0 + 1).D[0] */
    6533         pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
    6534                                                     0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
    6535         /* cbz tmp0, +1 */
    6536         pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    6537         /* brk #0x1000+enmGstReg */
    6538         pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
    6539     }
    6540 
    6541 #  else
    6542 #   error "Port me!"
    6543 #  endif
    6544 
    6545     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6546     return off;
    6547 }
    6548 # endif
    6549 #endif /* VBOX_STRICT */
    6550 
    6551 
    6552 #ifdef VBOX_STRICT
    6553 /**
    6554  * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
    6555  * important bits.
    6556  *
    6557  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    6558  *       Trashes EFLAGS on AMD64.
    6559  */
    6560 static uint32_t
    6561 iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
    6562 {
    6563     uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
    6564     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
    6565     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
    6566     off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
    6567 
    6568 #ifdef RT_ARCH_AMD64
    6569     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    6570 
    6571     /* je/jz +1 */
    6572     pbCodeBuf[off++] = 0x74;
    6573     pbCodeBuf[off++] = 0x01;
    6574 
    6575     /* int3 */
    6576     pbCodeBuf[off++] = 0xcc;
    6577 
    6578 # elif defined(RT_ARCH_ARM64)
    6579     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    6580 
    6581     /* b.eq +1 */
    6582     pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
    6583     /* brk #0x2000 */
    6584     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
    6585 
    6586 # else
    6587 #  error "Port me!"
    6588 # endif
    6589     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6590 
    6591     iemNativeRegFreeTmp(pReNative, idxRegTmp);
    6592     return off;
    6593 }
    6594 #endif /* VBOX_STRICT */
    6595 
    6596 
    6597 /**
    6598  * Emits a code for checking the return code of a call and rcPassUp, returning
    6599  * from the code if either are non-zero.
    6600  */
    6601 DECL_HIDDEN_THROW(uint32_t)
    6602 iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    6603 {
    6604 #ifdef RT_ARCH_AMD64
    6605     /*
    6606      * AMD64: eax = call status code.
    6607      */
    6608 
    6609     /* edx = rcPassUp */
    6610     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    6611 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    6612     off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
    6613 # endif
    6614 
    6615     /* edx = eax | rcPassUp */
    6616     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    6617     pbCodeBuf[off++] = 0x0b;                    /* or edx, eax */
    6618     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
    6619     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6620 
    6621     /* Jump to non-zero status return path. */
    6622     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
    6623 
    6624     /* done. */
    6625 
    6626 #elif RT_ARCH_ARM64
    6627     /*
    6628      * ARM64: w0 = call status code.
    6629      */
    6630 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    6631     off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
    6632 # endif
    6633     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    6634 
    6635     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    6636 
    6637     pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
    6638 
    6639     uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    6640     iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
    6641     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
    6642 
    6643 #else
    6644 # error "port me"
    6645 #endif
    6646     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6647     RT_NOREF_PV(idxInstr);
    6648     return off;
    6649 }
    6650 
    6651 
    6652 /**
    6653  * Emits code to check if the content of @a idxAddrReg is a canonical address,
    6654  * raising a \#GP(0) if it isn't.
    6655  *
    6656  * @returns New code buffer offset, UINT32_MAX on failure.
    6657  * @param   pReNative       The native recompile state.
    6658  * @param   off             The code buffer offset.
    6659  * @param   idxAddrReg      The host register with the address to check.
    6660  * @param   idxInstr        The current instruction.
    6661  */
    6662 DECL_HIDDEN_THROW(uint32_t)
    6663 iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
    6664 {
    6665     /*
    6666      * Make sure we don't have any outstanding guest register writes as we may
    6667      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    6668      */
    6669     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6670 
    6671 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    6672     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    6673 #else
    6674     RT_NOREF(idxInstr);
    6675 #endif
    6676 
    6677 #ifdef RT_ARCH_AMD64
    6678     /*
    6679      * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
    6680      *     return raisexcpt();
    6681      * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
    6682      */
    6683     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    6684 
    6685     off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
    6686     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
    6687     off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
    6688     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
    6689     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    6690 
    6691     iemNativeRegFreeTmp(pReNative, iTmpReg);
    6692 
    6693 #elif defined(RT_ARCH_ARM64)
    6694     /*
    6695      * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
    6696      *     return raisexcpt();
    6697      * ----
    6698      *     mov     x1, 0x800000000000
    6699      *     add     x1, x0, x1
    6700      *     cmp     xzr, x1, lsr 48
    6701      *     b.ne    .Lraisexcpt
    6702      */
    6703     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    6704 
    6705     off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
    6706     off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
    6707     off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
    6708     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    6709 
    6710     iemNativeRegFreeTmp(pReNative, iTmpReg);
    6711 
    6712 #else
    6713 # error "Port me"
    6714 #endif
    6715     return off;
    6716 }
    6717 
    6718 
    6719 /**
    6720  * Emits code to check if that the content of @a idxAddrReg is within the limit
    6721  * of CS, raising a \#GP(0) if it isn't.
    6722  *
    6723  * @returns New code buffer offset; throws VBox status code on error.
    6724  * @param   pReNative       The native recompile state.
    6725  * @param   off             The code buffer offset.
    6726  * @param   idxAddrReg      The host register (32-bit) with the address to
    6727  *                          check.
    6728  * @param   idxInstr        The current instruction.
    6729  */
    6730 DECL_HIDDEN_THROW(uint32_t)
    6731 iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6732                                                       uint8_t idxAddrReg, uint8_t idxInstr)
    6733 {
    6734     /*
    6735      * Make sure we don't have any outstanding guest register writes as we may
    6736      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    6737      */
    6738     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6739 
    6740 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    6741     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    6742 #else
    6743     RT_NOREF(idxInstr);
    6744 #endif
    6745 
    6746     uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
    6747                                                                 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
    6748                                                                 kIemNativeGstRegUse_ReadOnly);
    6749 
    6750     off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
    6751     off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    6752 
    6753     iemNativeRegFreeTmp(pReNative, idxRegCsLim);
    6754     return off;
    6755 }
    6756 
    6757 
    6758 /**
    6759  * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
    6760  *
    6761  * @returns The flush mask.
    6762  * @param   fCImpl          The IEM_CIMPL_F_XXX flags.
    6763  * @param   fGstShwFlush    The starting flush mask.
    6764  */
    6765 DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
    6766 {
    6767     if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
    6768         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_CS)
    6769                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_CS)
    6770                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
    6771     if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
    6772         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
    6773                      |  RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_SS)
    6774                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_SS)
    6775                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
    6776     else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
    6777         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
    6778     if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
    6779         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    6780     return fGstShwFlush;
    6781 }
    6782 
    6783 
    6784 /**
    6785  * Emits a call to a CImpl function or something similar.
    6786  */
    6787 DECL_HIDDEN_THROW(uint32_t)
    6788 iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
    6789                        uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
    6790 {
    6791     /* Writeback everything. */
    6792     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6793 
    6794     /*
    6795      * Flush stuff. PC and EFlags are implictly flushed, the latter because we
    6796      * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
    6797      */
    6798     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
    6799                                                              fGstShwFlush
    6800                                                              | RT_BIT_64(kIemNativeGstReg_Pc)
    6801                                                              | RT_BIT_64(kIemNativeGstReg_EFlags));
    6802     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    6803 
    6804     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    6805 
    6806     /*
    6807      * Load the parameters.
    6808      */
    6809 #if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
    6810     /* Special code the hidden VBOXSTRICTRC pointer. */
    6811     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    6812     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    6813     if (cAddParams > 0)
    6814         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
    6815     if (cAddParams > 1)
    6816         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
    6817     if (cAddParams > 2)
    6818         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
    6819     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    6820 
    6821 #else
    6822     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    6823     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    6824     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    6825     if (cAddParams > 0)
    6826         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
    6827     if (cAddParams > 1)
    6828         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
    6829     if (cAddParams > 2)
    6830 # if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
    6831         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
    6832 # else
    6833         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
    6834 # endif
    6835 #endif
    6836 
    6837     /*
    6838      * Make the call.
    6839      */
    6840     off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
    6841 
    6842 #if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    6843     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    6844 #endif
    6845 
    6846     /*
    6847      * Check the status code.
    6848      */
    6849     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    6850 }
    6851 
    6852 
    6853 /**
    6854  * Emits a call to a threaded worker function.
    6855  */
    6856 DECL_HIDDEN_THROW(uint32_t)
    6857 iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
    6858 {
    6859     /* We don't know what the threaded function is doing so we must flush all pending writes. */
    6860     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6861 
    6862     iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
    6863     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    6864 
    6865 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    6866     /* The threaded function may throw / long jmp, so set current instruction
    6867        number if we're counting. */
    6868     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    6869 #endif
    6870 
    6871     uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
    6872 
    6873 #ifdef RT_ARCH_AMD64
    6874     /* Load the parameters and emit the call. */
    6875 # ifdef RT_OS_WINDOWS
    6876 #  ifndef VBOXSTRICTRC_STRICT_ENABLED
    6877     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    6878     if (cParams > 0)
    6879         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
    6880     if (cParams > 1)
    6881         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
    6882     if (cParams > 2)
    6883         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
    6884 #  else  /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
    6885     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
    6886     if (cParams > 0)
    6887         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
    6888     if (cParams > 1)
    6889         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
    6890     if (cParams > 2)
    6891     {
    6892         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
    6893         off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
    6894     }
    6895     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    6896 #  endif /* VBOXSTRICTRC_STRICT_ENABLED */
    6897 # else
    6898     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    6899     if (cParams > 0)
    6900         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
    6901     if (cParams > 1)
    6902         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
    6903     if (cParams > 2)
    6904         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
    6905 # endif
    6906 
    6907     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    6908 
    6909 # if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    6910     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    6911 # endif
    6912 
    6913 #elif RT_ARCH_ARM64
    6914     /*
    6915      * ARM64:
    6916      */
    6917     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    6918     if (cParams > 0)
    6919         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
    6920     if (cParams > 1)
    6921         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
    6922     if (cParams > 2)
    6923         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
    6924 
    6925     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    6926 
    6927 #else
    6928 # error "port me"
    6929 #endif
    6930 
    6931     /*
    6932      * Check the status code.
    6933      */
    6934     off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
    6935 
    6936     return off;
    6937 }
    6938 
    6939 #ifdef VBOX_WITH_STATISTICS
    6940 /**
    6941  * Emits code to update the thread call statistics.
    6942  */
    6943 DECL_INLINE_THROW(uint32_t)
    6944 iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
    6945 {
    6946     /*
    6947      * Update threaded function stats.
    6948      */
    6949     uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
    6950     AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
    6951 # if defined(RT_ARCH_ARM64)
    6952     uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
    6953     uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
    6954     off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
    6955     iemNativeRegFreeTmp(pReNative, idxTmp1);
    6956     iemNativeRegFreeTmp(pReNative, idxTmp2);
    6957 # else
    6958     off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
    6959 # endif
    6960     return off;
    6961 }
    6962 #endif /* VBOX_WITH_STATISTICS */
    6963 
    6964 
    6965 /**
    6966  * Emits the code at the CheckBranchMiss label.
    6967  */
    6968 static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    6969 {
    6970     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
    6971     if (idxLabel != UINT32_MAX)
    6972     {
    6973         iemNativeLabelDefine(pReNative, idxLabel, off);
    6974 
    6975         /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
    6976         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    6977         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
    6978 
    6979         /* jump back to the return sequence. */
    6980         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    6981     }
    6982     return off;
    6983 }
    6984 
    6985 
    6986 /**
    6987  * Emits the code at the NeedCsLimChecking label.
    6988  */
    6989 static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    6990 {
    6991     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
    6992     if (idxLabel != UINT32_MAX)
    6993     {
    6994         iemNativeLabelDefine(pReNative, idxLabel, off);
    6995 
    6996         /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
    6997         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    6998         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
    6999 
    7000         /* jump back to the return sequence. */
    7001         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7002     }
    7003     return off;
    7004 }
    7005 
    7006 
    7007 /**
    7008  * Emits the code at the ObsoleteTb label.
    7009  */
    7010 static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7011 {
    7012     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
    7013     if (idxLabel != UINT32_MAX)
    7014     {
    7015         iemNativeLabelDefine(pReNative, idxLabel, off);
    7016 
    7017         /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
    7018         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7019         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
    7020 
    7021         /* jump back to the return sequence. */
    7022         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7023     }
    7024     return off;
    7025 }
    7026 
    7027 
    7028 /**
    7029  * Emits the code at the RaiseGP0 label.
    7030  */
    7031 static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7032 {
    7033     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
    7034     if (idxLabel != UINT32_MAX)
    7035     {
    7036         iemNativeLabelDefine(pReNative, idxLabel, off);
    7037 
    7038         /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
    7039         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7040         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
    7041 
    7042         /* jump back to the return sequence. */
    7043         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7044     }
    7045     return off;
    7046 }
    7047 
    7048 
    7049 /**
    7050  * Emits the code at the RaiseNm label.
    7051  */
    7052 static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7053 {
    7054     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
    7055     if (idxLabel != UINT32_MAX)
    7056     {
    7057         iemNativeLabelDefine(pReNative, idxLabel, off);
    7058 
    7059         /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
    7060         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7061         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
    7062 
    7063         /* jump back to the return sequence. */
    7064         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7065     }
    7066     return off;
    7067 }
    7068 
    7069 
    7070 /**
    7071  * Emits the code at the RaiseUd label.
    7072  */
    7073 static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7074 {
    7075     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
    7076     if (idxLabel != UINT32_MAX)
    7077     {
    7078         iemNativeLabelDefine(pReNative, idxLabel, off);
    7079 
    7080         /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
    7081         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7082         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
    7083 
    7084         /* jump back to the return sequence. */
    7085         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7086     }
    7087     return off;
    7088 }
    7089 
    7090 
    7091 /**
    7092  * Emits the code at the RaiseMf label.
    7093  */
    7094 static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7095 {
    7096     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
    7097     if (idxLabel != UINT32_MAX)
    7098     {
    7099         iemNativeLabelDefine(pReNative, idxLabel, off);
    7100 
    7101         /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
    7102         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7103         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
    7104 
    7105         /* jump back to the return sequence. */
    7106         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7107     }
    7108     return off;
    7109 }
    7110 
    7111 
    7112 /**
    7113  * Emits the code at the RaiseXf label.
    7114  */
    7115 static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7116 {
    7117     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
    7118     if (idxLabel != UINT32_MAX)
    7119     {
    7120         iemNativeLabelDefine(pReNative, idxLabel, off);
    7121 
    7122         /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
    7123         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7124         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
    7125 
    7126         /* jump back to the return sequence. */
    7127         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7128     }
    7129     return off;
    7130 }
    7131 
    7132 
    7133 /**
    7134  * Emits the code at the ReturnWithFlags label (returns
    7135  * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
    7136  */
    7137 static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7138 {
    7139     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
    7140     if (idxLabel != UINT32_MAX)
    7141     {
    7142         iemNativeLabelDefine(pReNative, idxLabel, off);
    7143 
    7144         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
    7145 
    7146         /* jump back to the return sequence. */
    7147         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7148     }
    7149     return off;
    7150 }
    7151 
    7152 
    7153 /**
    7154  * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
    7155  */
    7156 static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7157 {
    7158     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
    7159     if (idxLabel != UINT32_MAX)
    7160     {
    7161         iemNativeLabelDefine(pReNative, idxLabel, off);
    7162 
    7163         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
    7164 
    7165         /* jump back to the return sequence. */
    7166         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7167     }
    7168     return off;
    7169 }
    7170 
    7171 
    7172 /**
    7173  * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
    7174  */
    7175 static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7176 {
    7177     /*
    7178      * Generate the rc + rcPassUp fiddling code if needed.
    7179      */
    7180     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    7181     if (idxLabel != UINT32_MAX)
    7182     {
    7183         iemNativeLabelDefine(pReNative, idxLabel, off);
    7184 
    7185         /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
    7186 #ifdef RT_ARCH_AMD64
    7187 # ifdef RT_OS_WINDOWS
    7188 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7189         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8,  X86_GREG_xCX); /* cl = instruction number */
    7190 #  endif
    7191         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    7192         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
    7193 # else
    7194         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    7195         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
    7196 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7197         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
    7198 #  endif
    7199 # endif
    7200 # ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7201         off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
    7202 # endif
    7203 
    7204 #else
    7205         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
    7206         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7207         /* IEMNATIVE_CALL_ARG2_GREG is already set. */
    7208 #endif
    7209 
    7210         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
    7211         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7212     }
    7213     return off;
    7214 }
    7215 
    7216 
    7217 /**
    7218  * Emits a standard epilog.
    7219  */
    7220 static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
    7221 {
    7222     *pidxReturnLabel = UINT32_MAX;
    7223 
    7224     /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
    7225     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7226 
    7227     /*
    7228      * Successful return, so clear the return register (eax, w0).
    7229      */
    7230     off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
    7231 
    7232     /*
    7233      * Define label for common return point.
    7234      */
    7235     uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
    7236     *pidxReturnLabel = idxReturn;
    7237 
    7238     /*
    7239      * Restore registers and return.
    7240      */
    7241 #ifdef RT_ARCH_AMD64
    7242     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    7243 
    7244     /* Reposition esp at the r15 restore point. */
    7245     pbCodeBuf[off++] = X86_OP_REX_W;
    7246     pbCodeBuf[off++] = 0x8d;                    /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
    7247     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
    7248     pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
    7249 
    7250     /* Pop non-volatile registers and return */
    7251     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r15 */
    7252     pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
    7253     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r14 */
    7254     pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
    7255     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r13 */
    7256     pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
    7257     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r12 */
    7258     pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
    7259 # ifdef RT_OS_WINDOWS
    7260     pbCodeBuf[off++] = 0x58 + X86_GREG_xDI;     /* pop rdi */
    7261     pbCodeBuf[off++] = 0x58 + X86_GREG_xSI;     /* pop rsi */
    7262 # endif
    7263     pbCodeBuf[off++] = 0x58 + X86_GREG_xBX;     /* pop rbx */
    7264     pbCodeBuf[off++] = 0xc9;                    /* leave */
    7265     pbCodeBuf[off++] = 0xc3;                    /* ret */
    7266     pbCodeBuf[off++] = 0xcc;                    /* int3 poison */
    7267 
    7268 #elif RT_ARCH_ARM64
    7269     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    7270 
    7271     /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
    7272     AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
    7273     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    7274                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    7275                                                  IEMNATIVE_FRAME_VAR_SIZE / 8);
    7276     /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
    7277     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7278                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    7279     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7280                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    7281     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7282                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    7283     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7284                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    7285     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7286                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    7287     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    7288 
    7289     /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ;  */
    7290     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
    7291     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
    7292                                                      IEMNATIVE_FRAME_SAVE_REG_SIZE);
    7293 
    7294     /* retab / ret */
    7295 # ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
    7296     if (1)
    7297         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
    7298     else
    7299 # endif
    7300         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
    7301 
    7302 #else
    7303 # error "port me"
    7304 #endif
    7305     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7306 
    7307     return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
    7308 }
    7309 
    7310 
    7311 /**
    7312  * Emits a standard prolog.
    7313  */
    7314 static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    7315 {
    7316 #ifdef RT_ARCH_AMD64
    7317     /*
    7318      * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
    7319      * reserving 64 bytes for stack variables plus 4 non-register argument
    7320      * slots.  Fixed register assignment: xBX = pReNative;
    7321      *
    7322      * Since we always do the same register spilling, we can use the same
    7323      * unwind description for all the code.
    7324      */
    7325     uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    7326     pbCodeBuf[off++] = 0x50 + X86_GREG_xBP;     /* push rbp */
    7327     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbp, rsp */
    7328     pbCodeBuf[off++] = 0x8b;
    7329     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
    7330     pbCodeBuf[off++] = 0x50 + X86_GREG_xBX;     /* push rbx */
    7331     AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
    7332 # ifdef RT_OS_WINDOWS
    7333     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rcx ; RBX = pVCpu */
    7334     pbCodeBuf[off++] = 0x8b;
    7335     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
    7336     pbCodeBuf[off++] = 0x50 + X86_GREG_xSI;     /* push rsi */
    7337     pbCodeBuf[off++] = 0x50 + X86_GREG_xDI;     /* push rdi */
    7338 # else
    7339     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rdi ; RBX = pVCpu */
    7340     pbCodeBuf[off++] = 0x8b;
    7341     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
    7342 # endif
    7343     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r12 */
    7344     pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
    7345     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r13 */
    7346     pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
    7347     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r14 */
    7348     pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
    7349     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r15 */
    7350     pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
    7351 
    7352 # ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
    7353     /* Save the frame pointer. */
    7354     off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
    7355 # endif
    7356 
    7357     off = iemNativeEmitSubGprImm(pReNative, off,    /* sub rsp, byte 28h */
    7358                                  X86_GREG_xSP,
    7359                                    IEMNATIVE_FRAME_ALIGN_SIZE
    7360                                  + IEMNATIVE_FRAME_VAR_SIZE
    7361                                  + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
    7362                                  + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
    7363     AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
    7364     AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
    7365     AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
    7366 
    7367 #elif RT_ARCH_ARM64
    7368     /*
    7369      * We set up a stack frame exactly like on x86, only we have to push the
    7370      * return address our selves here.  We save all non-volatile registers.
    7371      */
    7372     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
    7373 
    7374 # ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
    7375                       * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind.  It's
    7376                       * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
    7377                       * in any way conditional, so just emitting this instructions now and hoping for the best... */
    7378     /* pacibsp */
    7379     pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
    7380 # endif
    7381 
    7382     /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
    7383     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
    7384     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    7385                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    7386                                                  -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
    7387     /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
    7388     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7389                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    7390     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7391                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    7392     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7393                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    7394     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7395                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    7396     /* Save the BP and LR (ret address) registers at the top of the frame. */
    7397     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7398                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    7399     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    7400     /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
    7401     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
    7402                                                      ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
    7403 
    7404     /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ;  Allocate the variable area from SP. */
    7405     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
    7406 
    7407     /* mov r28, r0  */
    7408     off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
    7409     /* mov r27, r1  */
    7410     off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
    7411 
    7412 # ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
    7413     /* Save the frame pointer. */
    7414     off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
    7415                                            ARMV8_A64_REG_X2);
    7416 # endif
    7417 
    7418 #else
    7419 # error "port me"
    7420 #endif
    7421     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7422     return off;
    7423 }
    7424 
    7425131
    7426132
     
    7450156*   Native Emitter Support.                                                                                                      *
    7451157*********************************************************************************************************************************/
    7452 
    7453158
    7454159#define IEM_MC_NATIVE_IF(a_fSupportedHosts)     if (RT_ARCH_VAL & (a_fSupportedHosts)) {
     
    90521757*   Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++                                                            *
    90531758*********************************************************************************************************************************/
    9054 /** Number of hidden arguments for CIMPL calls.
    9055  * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
    9056 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    9057 # define IEM_CIMPL_HIDDEN_ARGS 3
    9058 #else
    9059 # define IEM_CIMPL_HIDDEN_ARGS 2
    9060 #endif
    90611759
    90621760#define IEM_MC_NOREF(a_Name) \
     
    90771775#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
    90781776    uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
    9079 
    9080 
    9081 /**
    9082  * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
    9083  */
    9084 DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
    9085 {
    9086     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
    9087         return IEM_CIMPL_HIDDEN_ARGS;
    9088     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
    9089         return 1;
    9090     return 0;
    9091 }
    9092 
    9093 
    9094 /**
    9095  * Internal work that allocates a variable with kind set to
    9096  * kIemNativeVarKind_Invalid and no current stack allocation.
    9097  *
    9098  * The kind will either be set by the caller or later when the variable is first
    9099  * assigned a value.
    9100  *
    9101  * @returns Unpacked index.
    9102  * @internal
    9103  */
    9104 static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    9105 {
    9106     Assert(cbType > 0 && cbType <= 64);
    9107     unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
    9108     AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
    9109     pReNative->Core.bmVars |= RT_BIT_32(idxVar);
    9110     pReNative->Core.aVars[idxVar].enmKind        = kIemNativeVarKind_Invalid;
    9111     pReNative->Core.aVars[idxVar].cbVar          = cbType;
    9112     pReNative->Core.aVars[idxVar].idxStackSlot   = UINT8_MAX;
    9113     pReNative->Core.aVars[idxVar].idxReg         = UINT8_MAX;
    9114     pReNative->Core.aVars[idxVar].uArgNo         = UINT8_MAX;
    9115     pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
    9116     pReNative->Core.aVars[idxVar].enmGstReg      = kIemNativeGstReg_End;
    9117     pReNative->Core.aVars[idxVar].fRegAcquired   = false;
    9118     pReNative->Core.aVars[idxVar].u.uValue       = 0;
    9119     return idxVar;
    9120 }
    9121 
    9122 
    9123 /**
    9124  * Internal work that allocates an argument variable w/o setting enmKind.
    9125  *
    9126  * @returns Unpacked index.
    9127  * @internal
    9128  */
    9129 static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    9130 {
    9131     iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
    9132     AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    9133     AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
    9134 
    9135     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    9136     pReNative->Core.aidxArgVars[iArgNo]  = idxVar; /* (unpacked) */
    9137     pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
    9138     return idxVar;
    9139 }
    9140 
    9141 
    9142 /**
    9143  * Gets the stack slot for a stack variable, allocating one if necessary.
    9144  *
    9145  * Calling this function implies that the stack slot will contain a valid
    9146  * variable value.  The caller deals with any register currently assigned to the
    9147  * variable, typically by spilling it into the stack slot.
    9148  *
    9149  * @returns The stack slot number.
    9150  * @param   pReNative   The recompiler state.
    9151  * @param   idxVar      The variable.
    9152  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS
    9153  */
    9154 DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    9155 {
    9156     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9157     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    9158     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    9159 
    9160     /* Already got a slot? */
    9161     uint8_t const idxStackSlot = pVar->idxStackSlot;
    9162     if (idxStackSlot != UINT8_MAX)
    9163     {
    9164         Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
    9165         return idxStackSlot;
    9166     }
    9167 
    9168     /*
    9169      * A single slot is easy to allocate.
    9170      * Allocate them from the top end, closest to BP, to reduce the displacement.
    9171      */
    9172     if (pVar->cbVar <= sizeof(uint64_t))
    9173     {
    9174         unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    9175         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    9176         pReNative->Core.bmStack |= RT_BIT_32(iSlot);
    9177         pVar->idxStackSlot       = (uint8_t)iSlot;
    9178         Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
    9179         return (uint8_t)iSlot;
    9180     }
    9181 
    9182     /*
    9183      * We need more than one stack slot.
    9184      *
    9185      * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
    9186      */
    9187     AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
    9188     Assert(pVar->cbVar <= 64);
    9189     uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
    9190     uint32_t       fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
    9191     uint32_t       bmStack       = ~pReNative->Core.bmStack;
    9192     while (bmStack != UINT32_MAX)
    9193     {
    9194 /** @todo allocate from the top to reduce BP displacement. */
    9195         unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
    9196         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    9197         if (!(iSlot & fBitAlignMask))
    9198         {
    9199             if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
    9200             {
    9201                 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
    9202                 pVar->idxStackSlot       = (uint8_t)iSlot;
    9203                 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
    9204                        idxVar, iSlot, fBitAllocMask, pVar->cbVar));
    9205                 return (uint8_t)iSlot;
    9206             }
    9207         }
    9208         bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
    9209     }
    9210     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    9211 }
    9212 
    9213 
    9214 /**
    9215  * Changes the variable to a stack variable.
    9216  *
    9217  * Currently this is s only possible to do the first time the variable is used,
    9218  * switching later is can be implemented but not done.
    9219  *
    9220  * @param   pReNative   The recompiler state.
    9221  * @param   idxVar      The variable.
    9222  * @throws  VERR_IEM_VAR_IPE_2
    9223  */
    9224 static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    9225 {
    9226     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9227     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    9228     if (pVar->enmKind != kIemNativeVarKind_Stack)
    9229     {
    9230         /* We could in theory transition from immediate to stack as well, but it
    9231            would involve the caller doing work storing the value on the stack. So,
    9232            till that's required we only allow transition from invalid. */
    9233         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    9234         AssertStmt(pVar->idxReg  == UINT8_MAX,                 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    9235         pVar->enmKind = kIemNativeVarKind_Stack;
    9236 
    9237         /* Note! We don't allocate a stack slot here, that's only done when a
    9238                  slot is actually needed to hold a variable value. */
    9239     }
    9240 }
    9241 
    9242 
    9243 /**
    9244  * Sets it to a variable with a constant value.
    9245  *
    9246  * This does not require stack storage as we know the value and can always
    9247  * reload it, unless of course it's referenced.
    9248  *
    9249  * @param   pReNative   The recompiler state.
    9250  * @param   idxVar      The variable.
    9251  * @param   uValue      The immediate value.
    9252  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    9253  */
    9254 static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
    9255 {
    9256     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9257     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    9258     if (pVar->enmKind != kIemNativeVarKind_Immediate)
    9259     {
    9260         /* Only simple transitions for now. */
    9261         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    9262         pVar->enmKind = kIemNativeVarKind_Immediate;
    9263     }
    9264     AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    9265 
    9266     pVar->u.uValue = uValue;
    9267     AssertMsg(   pVar->cbVar >= sizeof(uint64_t)
    9268               || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
    9269               ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
    9270 }
    9271 
    9272 
    9273 /**
    9274  * Sets the variable to a reference (pointer) to @a idxOtherVar.
    9275  *
    9276  * This does not require stack storage as we know the value and can always
    9277  * reload it.  Loading is postponed till needed.
    9278  *
    9279  * @param   pReNative   The recompiler state.
    9280  * @param   idxVar      The variable. Unpacked.
    9281  * @param   idxOtherVar The variable to take the (stack) address of. Unpacked.
    9282  *
    9283  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    9284  * @internal
    9285  */
    9286 static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
    9287 {
    9288     Assert(idxVar      < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
    9289     Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
    9290 
    9291     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
    9292     {
    9293         /* Only simple transitions for now. */
    9294         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    9295                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    9296         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
    9297     }
    9298     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    9299 
    9300     pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
    9301 
    9302     /* Update the other variable, ensure it's a stack variable. */
    9303     /** @todo handle variables with const values... that'll go boom now. */
    9304     pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
    9305     iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    9306 }
    9307 
    9308 
    9309 /**
    9310  * Sets the variable to a reference (pointer) to a guest register reference.
    9311  *
    9312  * This does not require stack storage as we know the value and can always
    9313  * reload it.  Loading is postponed till needed.
    9314  *
    9315  * @param   pReNative       The recompiler state.
    9316  * @param   idxVar          The variable.
    9317  * @param   enmRegClass     The class guest registers to reference.
    9318  * @param   idxReg          The register within @a enmRegClass to reference.
    9319  *
    9320  * @throws  VERR_IEM_VAR_IPE_2
    9321  */
    9322 static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    9323                                            IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
    9324 {
    9325     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9326     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    9327 
    9328     if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
    9329     {
    9330         /* Only simple transitions for now. */
    9331         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    9332         pVar->enmKind = kIemNativeVarKind_GstRegRef;
    9333     }
    9334     AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    9335 
    9336     pVar->u.GstRegRef.enmClass = enmRegClass;
    9337     pVar->u.GstRegRef.idx      = idxReg;
    9338 }
    9339 
    9340 
    9341 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    9342 {
    9343     return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
    9344 }
    9345 
    9346 
    9347 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
    9348 {
    9349     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
    9350 
    9351     /* Since we're using a generic uint64_t value type, we must truncate it if
    9352        the variable is smaller otherwise we may end up with too large value when
    9353        scaling up a imm8 w/ sign-extension.
    9354 
    9355        This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
    9356        in the bios, bx=1) when running on arm, because clang expect 16-bit
    9357        register parameters to have bits 16 and up set to zero.  Instead of
    9358        setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
    9359        CF value in the result.  */
    9360     switch (cbType)
    9361     {
    9362         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    9363         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    9364         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    9365     }
    9366     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    9367     return idxVar;
    9368 }
    9369 
    9370 
    9371 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
    9372 {
    9373     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
    9374     idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
    9375     AssertStmt(   idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
    9376                && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
    9377                && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
    9378                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    9379 
    9380     uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
    9381     iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
    9382     return idxArgVar;
    9383 }
    9384 
    9385 
    9386 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    9387 {
    9388     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
    9389     /* Don't set to stack now, leave that to the first use as for instance
    9390        IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
    9391     return idxVar;
    9392 }
    9393 
    9394 
    9395 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
    9396 {
    9397     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
    9398 
    9399     /* Since we're using a generic uint64_t value type, we must truncate it if
    9400        the variable is smaller otherwise we may end up with too large value when
    9401        scaling up a imm8 w/ sign-extension. */
    9402     switch (cbType)
    9403     {
    9404         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    9405         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    9406         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    9407     }
    9408     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    9409     return idxVar;
    9410 }
    9411 
    9412 
    9413 /**
    9414  * Makes sure variable @a idxVar has a register assigned to it and that it stays
    9415  * fixed till we call iemNativeVarRegisterRelease.
    9416  *
    9417  * @returns The host register number.
    9418  * @param   pReNative   The recompiler state.
    9419  * @param   idxVar      The variable.
    9420  * @param   poff        Pointer to the instruction buffer offset.
    9421  *                      In case a register needs to be freed up or the value
    9422  *                      loaded off the stack.
    9423  * @param  fInitialized Set if the variable must already have been initialized.
    9424  *                      Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
    9425  *                      the case.
    9426  * @param  idxRegPref   Preferred register number or UINT8_MAX.
    9427  */
    9428 DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
    9429                                                        bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
    9430 {
    9431     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9432     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    9433     Assert(pVar->cbVar <= 8);
    9434     Assert(!pVar->fRegAcquired);
    9435 
    9436     uint8_t idxReg = pVar->idxReg;
    9437     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9438     {
    9439         Assert(   pVar->enmKind > kIemNativeVarKind_Invalid
    9440                && pVar->enmKind < kIemNativeVarKind_End);
    9441         pVar->fRegAcquired = true;
    9442         return idxReg;
    9443     }
    9444 
    9445     /*
    9446      * If the kind of variable has not yet been set, default to 'stack'.
    9447      */
    9448     Assert(   pVar->enmKind >= kIemNativeVarKind_Invalid
    9449            && pVar->enmKind < kIemNativeVarKind_End);
    9450     if (pVar->enmKind == kIemNativeVarKind_Invalid)
    9451         iemNativeVarSetKindToStack(pReNative, idxVar);
    9452 
    9453     /*
    9454      * We have to allocate a register for the variable, even if its a stack one
    9455      * as we don't know if there are modification being made to it before its
    9456      * finalized (todo: analyze and insert hints about that?).
    9457      *
    9458      * If we can, we try get the correct register for argument variables. This
    9459      * is assuming that most argument variables are fetched as close as possible
    9460      * to the actual call, so that there aren't any interfering hidden calls
    9461      * (memory accesses, etc) inbetween.
    9462      *
    9463      * If we cannot or it's a variable, we make sure no argument registers
    9464      * that will be used by this MC block will be allocated here, and we always
    9465      * prefer non-volatile registers to avoid needing to spill stuff for internal
    9466      * call.
    9467      */
    9468     /** @todo Detect too early argument value fetches and warn about hidden
    9469      * calls causing less optimal code to be generated in the python script. */
    9470 
    9471     uint8_t const uArgNo = pVar->uArgNo;
    9472     if (   uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
    9473         && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
    9474     {
    9475         idxReg = g_aidxIemNativeCallRegs[uArgNo];
    9476         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    9477         Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
    9478     }
    9479     else if (   idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
    9480              || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
    9481     {
    9482         uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
    9483         uint32_t const fRegs        = ~pReNative->Core.bmHstRegs
    9484                                     & ~pReNative->Core.bmHstRegsWithGstShadow
    9485                                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    9486                                     & fNotArgsMask;
    9487         if (fRegs)
    9488         {
    9489             /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
    9490             idxReg = (uint8_t)ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    9491                                                ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    9492             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    9493             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    9494             Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    9495         }
    9496         else
    9497         {
    9498             idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
    9499                                                IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
    9500             AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
    9501             Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    9502         }
    9503     }
    9504     else
    9505     {
    9506         idxReg = idxRegPref;
    9507         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    9508         Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
    9509     }
    9510     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    9511     pVar->idxReg = idxReg;
    9512 
    9513     /*
    9514      * Load it off the stack if we've got a stack slot.
    9515      */
    9516     uint8_t const idxStackSlot = pVar->idxStackSlot;
    9517     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    9518     {
    9519         Assert(fInitialized);
    9520         int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
    9521         switch (pVar->cbVar)
    9522         {
    9523             case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
    9524             case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
    9525             case 3: AssertFailed(); RT_FALL_THRU();
    9526             case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
    9527             default: AssertFailed(); RT_FALL_THRU();
    9528             case 8: *poff = iemNativeEmitLoadGprByBp(   pReNative, *poff, idxReg, offDispBp); break;
    9529         }
    9530     }
    9531     else
    9532     {
    9533         Assert(idxStackSlot == UINT8_MAX);
    9534         AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    9535     }
    9536     pVar->fRegAcquired = true;
    9537     return idxReg;
    9538 }
    9539 
    9540 
    9541 /**
    9542  * The value of variable @a idxVar will be written in full to the @a enmGstReg
    9543  * guest register.
    9544  *
    9545  * This function makes sure there is a register for it and sets it to be the
    9546  * current shadow copy of @a enmGstReg.
    9547  *
    9548  * @returns The host register number.
    9549  * @param   pReNative   The recompiler state.
    9550  * @param   idxVar      The variable.
    9551  * @param   enmGstReg   The guest register this variable will be written to
    9552  *                      after this call.
    9553  * @param   poff        Pointer to the instruction buffer offset.
    9554  *                      In case a register needs to be freed up or if the
    9555  *                      variable content needs to be loaded off the stack.
    9556  *
    9557  * @note    We DO NOT expect @a idxVar to be an argument variable,
    9558  *          because we can only in the commit stage of an instruction when this
    9559  *          function is used.
    9560  */
    9561 DECL_HIDDEN_THROW(uint8_t)
    9562 iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
    9563 {
    9564     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9565     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    9566     Assert(!pVar->fRegAcquired);
    9567     AssertMsgStmt(   pVar->cbVar <= 8
    9568                   && (   pVar->enmKind == kIemNativeVarKind_Immediate
    9569                       || pVar->enmKind == kIemNativeVarKind_Stack),
    9570                   ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
    9571                    pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
    9572                   IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    9573 
    9574     /*
    9575      * This shouldn't ever be used for arguments, unless it's in a weird else
    9576      * branch that doesn't do any calling and even then it's questionable.
    9577      *
    9578      * However, in case someone writes crazy wrong MC code and does register
    9579      * updates before making calls, just use the regular register allocator to
    9580      * ensure we get a register suitable for the intended argument number.
    9581      */
    9582     AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
    9583 
    9584     /*
    9585      * If there is already a register for the variable, we transfer/set the
    9586      * guest shadow copy assignment to it.
    9587      */
    9588     uint8_t idxReg = pVar->idxReg;
    9589     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9590     {
    9591         if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    9592         {
    9593             uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
    9594             iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
    9595             Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
    9596                    g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
    9597         }
    9598         else
    9599         {
    9600             iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
    9601             Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
    9602                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    9603         }
    9604         /** @todo figure this one out. We need some way of making sure the register isn't
    9605          * modified after this point, just in case we start writing crappy MC code. */
    9606         pVar->enmGstReg    = enmGstReg;
    9607         pVar->fRegAcquired = true;
    9608         return idxReg;
    9609     }
    9610     Assert(pVar->uArgNo == UINT8_MAX);
    9611 
    9612     /*
    9613      * Because this is supposed to be the commit stage, we're just tag along with the
    9614      * temporary register allocator and upgrade it to a variable register.
    9615      */
    9616     idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
    9617     Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
    9618     Assert(pReNative->Core.aHstRegs[idxReg].idxVar  == UINT8_MAX);
    9619     pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
    9620     pReNative->Core.aHstRegs[idxReg].idxVar  = idxVar;
    9621     pVar->idxReg                             = idxReg;
    9622 
    9623     /*
    9624      * Now we need to load the register value.
    9625      */
    9626     if (pVar->enmKind == kIemNativeVarKind_Immediate)
    9627         *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
    9628     else
    9629     {
    9630         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    9631         int32_t const offDispBp    = iemNativeStackCalcBpDisp(idxStackSlot);
    9632         switch (pVar->cbVar)
    9633         {
    9634             case sizeof(uint64_t):
    9635                 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
    9636                 break;
    9637             case sizeof(uint32_t):
    9638                 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
    9639                 break;
    9640             case sizeof(uint16_t):
    9641                 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
    9642                 break;
    9643             case sizeof(uint8_t):
    9644                 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
    9645                 break;
    9646             default:
    9647                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    9648         }
    9649     }
    9650 
    9651     pVar->fRegAcquired = true;
    9652     return idxReg;
    9653 }
    96541777
    96551778
     
    97001823    pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
    97011824    return idxReg;
    9702 }
    9703 
    9704 
    9705 /**
    9706  * Emit code to save volatile registers prior to a call to a helper (TLB miss).
    9707  *
    9708  * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
    9709  * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
    9710  * requirement of flushing anything in volatile host registers when making a
    9711  * call.
    9712  *
    9713  * @returns New @a off value.
    9714  * @param   pReNative           The recompiler state.
    9715  * @param   off                 The code buffer position.
    9716  * @param   fHstRegsNotToSave   Set of registers not to save & restore.
    9717  */
    9718 DECL_HIDDEN_THROW(uint32_t)
    9719 iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
    9720 {
    9721     uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
    9722     if (fHstRegs)
    9723     {
    9724         do
    9725         {
    9726             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    9727             fHstRegs &= ~RT_BIT_32(idxHstReg);
    9728 
    9729             if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
    9730             {
    9731                 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    9732                 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9733                 AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
    9734                            && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
    9735                            && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
    9736                            IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    9737                 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
    9738                 {
    9739                     case kIemNativeVarKind_Stack:
    9740                     {
    9741                         /* Temporarily spill the variable register. */
    9742                         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    9743                         Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    9744                                idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    9745                         off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    9746                         continue;
    9747                     }
    9748 
    9749                     case kIemNativeVarKind_Immediate:
    9750                     case kIemNativeVarKind_VarRef:
    9751                     case kIemNativeVarKind_GstRegRef:
    9752                         /* It is weird to have any of these loaded at this point. */
    9753                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    9754                         continue;
    9755 
    9756                     case kIemNativeVarKind_End:
    9757                     case kIemNativeVarKind_Invalid:
    9758                         break;
    9759                 }
    9760                 AssertFailed();
    9761             }
    9762             else
    9763             {
    9764                 /*
    9765                  * Allocate a temporary stack slot and spill the register to it.
    9766                  */
    9767                 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    9768                 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
    9769                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    9770                 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
    9771                 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
    9772                 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    9773                        idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    9774                 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    9775             }
    9776         } while (fHstRegs);
    9777     }
    9778     return off;
    9779 }
    9780 
    9781 
    9782 /**
    9783  * Emit code to restore volatile registers after to a call to a helper.
    9784  *
    9785  * @returns New @a off value.
    9786  * @param   pReNative           The recompiler state.
    9787  * @param   off                 The code buffer position.
    9788  * @param   fHstRegsNotToSave   Set of registers not to save & restore.
    9789  * @see     iemNativeVarSaveVolatileRegsPreHlpCall(),
    9790  *          iemNativeRegRestoreGuestShadowsInVolatileRegs()
    9791  */
    9792 DECL_HIDDEN_THROW(uint32_t)
    9793 iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
    9794 {
    9795     uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
    9796     if (fHstRegs)
    9797     {
    9798         do
    9799         {
    9800             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    9801             fHstRegs &= ~RT_BIT_32(idxHstReg);
    9802 
    9803             if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
    9804             {
    9805                 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    9806                 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9807                 AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
    9808                            && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
    9809                            && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
    9810                            IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    9811                 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
    9812                 {
    9813                     case kIemNativeVarKind_Stack:
    9814                     {
    9815                         /* Unspill the variable register. */
    9816                         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    9817                         Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
    9818                                idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    9819                         off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    9820                         continue;
    9821                     }
    9822 
    9823                     case kIemNativeVarKind_Immediate:
    9824                     case kIemNativeVarKind_VarRef:
    9825                     case kIemNativeVarKind_GstRegRef:
    9826                         /* It is weird to have any of these loaded at this point. */
    9827                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    9828                         continue;
    9829 
    9830                     case kIemNativeVarKind_End:
    9831                     case kIemNativeVarKind_Invalid:
    9832                         break;
    9833                 }
    9834                 AssertFailed();
    9835             }
    9836             else
    9837             {
    9838                 /*
    9839                  * Restore from temporary stack slot.
    9840                  */
    9841                 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
    9842                 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
    9843                 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
    9844                 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
    9845 
    9846                 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    9847             }
    9848         } while (fHstRegs);
    9849     }
    9850     return off;
    9851 }
    9852 
    9853 
    9854 /**
    9855  * Worker that frees the stack slots for variable @a idxVar if any allocated.
    9856  *
    9857  * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
    9858  *
    9859  * ASSUMES that @a idxVar is valid and unpacked.
    9860  */
    9861 DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    9862 {
    9863     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
    9864     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    9865     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    9866     {
    9867         uint8_t const  cbVar      = pReNative->Core.aVars[idxVar].cbVar;
    9868         uint8_t const  cSlots     = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
    9869         uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
    9870         Assert(cSlots > 0);
    9871         Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
    9872         Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
    9873                idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
    9874         pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
    9875         pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
    9876     }
    9877     else
    9878         Assert(idxStackSlot == UINT8_MAX);
    9879 }
    9880 
    9881 
    9882 /**
    9883  * Worker that frees a single variable.
    9884  *
    9885  * ASSUMES that @a idxVar is valid and unpacked.
    9886  */
    9887 DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    9888 {
    9889     Assert(   pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid  /* Including invalid as we may have unused */
    9890            && pReNative->Core.aVars[idxVar].enmKind <  kIemNativeVarKind_End);    /* variables in conditional branches. */
    9891     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    9892 
    9893     /* Free the host register first if any assigned. */
    9894     uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    9895     if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9896     {
    9897         Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
    9898         pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    9899         pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    9900     }
    9901 
    9902     /* Free argument mapping. */
    9903     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    9904     if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
    9905         pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
    9906 
    9907     /* Free the stack slots. */
    9908     iemNativeVarFreeStackSlots(pReNative, idxVar);
    9909 
    9910     /* Free the actual variable. */
    9911     pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
    9912     pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    9913 }
    9914 
    9915 
    9916 /**
    9917  * Worker for iemNativeVarFreeAll that's called when there is anything to do.
    9918  */
    9919 DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
    9920 {
    9921     while (bmVars != 0)
    9922     {
    9923         uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    9924         bmVars &= ~RT_BIT_32(idxVar);
    9925 
    9926 #if 1 /** @todo optimize by simplifying this later... */
    9927         iemNativeVarFreeOneWorker(pReNative, idxVar);
    9928 #else
    9929         /* Only need to free the host register, the rest is done as bulk updates below. */
    9930         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    9931         if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9932         {
    9933             Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
    9934             pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    9935             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    9936         }
    9937 #endif
    9938     }
    9939 #if 0 /** @todo optimize by simplifying this later... */
    9940     pReNative->Core.bmVars     = 0;
    9941     pReNative->Core.bmStack    = 0;
    9942     pReNative->Core.u64ArgVars = UINT64_MAX;
    9943 #endif
    99441825}
    99451826
     
    100541935*********************************************************************************************************************************/
    100551936
    10056 /**
    10057  * Emits code to load a reference to the given guest register into @a idxGprDst.
    10058   */
    10059 DECL_INLINE_THROW(uint32_t)
    10060 iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
    10061                                IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
    10062 {
    10063 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    10064     /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
    10065 #endif
    10066 
    10067     /*
    10068      * Get the offset relative to the CPUMCTX structure.
    10069      */
    10070     uint32_t offCpumCtx;
    10071     switch (enmClass)
    10072     {
    10073         case kIemNativeGstRegRef_Gpr:
    10074             Assert(idxRegInClass < 16);
    10075             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
    10076             break;
    10077 
    10078         case kIemNativeGstRegRef_GprHighByte:    /**< AH, CH, DH, BH*/
    10079             Assert(idxRegInClass < 4);
    10080             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
    10081             break;
    10082 
    10083         case kIemNativeGstRegRef_EFlags:
    10084             Assert(idxRegInClass == 0);
    10085             offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
    10086             break;
    10087 
    10088         case kIemNativeGstRegRef_MxCsr:
    10089             Assert(idxRegInClass == 0);
    10090             offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
    10091             break;
    10092 
    10093         case kIemNativeGstRegRef_FpuReg:
    10094             Assert(idxRegInClass < 8);
    10095             AssertFailed(); /** @todo what kind of indexing? */
    10096             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    10097             break;
    10098 
    10099         case kIemNativeGstRegRef_MReg:
    10100             Assert(idxRegInClass < 8);
    10101             AssertFailed(); /** @todo what kind of indexing? */
    10102             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    10103             break;
    10104 
    10105         case kIemNativeGstRegRef_XReg:
    10106             Assert(idxRegInClass < 16);
    10107             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
    10108             break;
    10109 
    10110         default:
    10111             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
    10112     }
    10113 
    10114     /*
    10115      * Load the value into the destination register.
    10116      */
    10117 #ifdef RT_ARCH_AMD64
    10118     off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
    10119 
    10120 #elif defined(RT_ARCH_ARM64)
    10121     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    10122     Assert(offCpumCtx < 4096);
    10123     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
    10124 
    10125 #else
    10126 # error "Port me!"
    10127 #endif
    10128 
    10129     return off;
    10130 }
    10131 
    10132 
    10133 /**
    10134  * Common code for CIMPL and AIMPL calls.
    10135  *
    10136  * These are calls that uses argument variables and such.  They should not be
    10137  * confused with internal calls required to implement an MC operation,
    10138  * like a TLB load and similar.
    10139  *
    10140  * Upon return all that is left to do is to load any hidden arguments and
    10141  * perform the call. All argument variables are freed.
    10142  *
    10143  * @returns New code buffer offset; throws VBox status code on error.
    10144  * @param   pReNative       The native recompile state.
    10145  * @param   off             The code buffer offset.
    10146  * @param   cArgs           The total nubmer of arguments (includes hidden
    10147  *                          count).
    10148  * @param   cHiddenArgs     The number of hidden arguments.  The hidden
    10149  *                          arguments must not have any variable declared for
    10150  *                          them, whereas all the regular arguments must
    10151  *                          (tstIEMCheckMc ensures this).
    10152  */
    10153 DECL_HIDDEN_THROW(uint32_t)
    10154 iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
    10155 {
    10156 #ifdef VBOX_STRICT
    10157     /*
    10158      * Assert sanity.
    10159      */
    10160     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    10161     Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
    10162     for (unsigned i = 0; i < cHiddenArgs; i++)
    10163         Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
    10164     for (unsigned i = cHiddenArgs; i < cArgs; i++)
    10165     {
    10166         Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
    10167         Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
    10168     }
    10169     iemNativeRegAssertSanity(pReNative);
    10170 #endif
    10171 
    10172     /* We don't know what the called function makes use of, so flush any pending register writes. */
    10173     off = iemNativeRegFlushPendingWrites(pReNative, off);
    10174 
    10175     /*
    10176      * Before we do anything else, go over variables that are referenced and
    10177      * make sure they are not in a register.
    10178      */
    10179     uint32_t bmVars = pReNative->Core.bmVars;
    10180     if (bmVars)
    10181     {
    10182         do
    10183         {
    10184             uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    10185             bmVars &= ~RT_BIT_32(idxVar);
    10186 
    10187             if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
    10188             {
    10189                 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
    10190                 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
    10191                 {
    10192                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    10193                     Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
    10194                            idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
    10195                            idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    10196                     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    10197 
    10198                     pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    10199                     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    10200                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    10201                     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    10202                     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    10203                 }
    10204             }
    10205         } while (bmVars != 0);
    10206 #if 0 //def VBOX_STRICT
    10207         iemNativeRegAssertSanity(pReNative);
    10208 #endif
    10209     }
    10210 
    10211     uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
    10212 
    10213     /*
    10214      * First, go over the host registers that will be used for arguments and make
    10215      * sure they either hold the desired argument or are free.
    10216      */
    10217     if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
    10218     {
    10219         for (uint32_t i = 0; i < cRegArgs; i++)
    10220         {
    10221             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    10222             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    10223             {
    10224                 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
    10225                 {
    10226                     uint8_t const       idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
    10227                     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    10228                     PIEMNATIVEVAR const pVar   = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    10229                     Assert(pVar->idxReg == idxArgReg);
    10230                     uint8_t const       uArgNo = pVar->uArgNo;
    10231                     if (uArgNo == i)
    10232                     { /* prefect */ }
    10233                     /* The variable allocator logic should make sure this is impossible,
    10234                        except for when the return register is used as a parameter (ARM,
    10235                        but not x86). */
    10236 #if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
    10237                     else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
    10238                     {
    10239 # ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    10240 #  error "Implement this"
    10241 # endif
    10242                         Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
    10243                         uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
    10244                         AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
    10245                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    10246                         off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
    10247                     }
    10248 #endif
    10249                     else
    10250                     {
    10251                         AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    10252 
    10253                         if (pVar->enmKind == kIemNativeVarKind_Stack)
    10254                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    10255                         else
    10256                         {
    10257                             /* just free it, can be reloaded if used again */
    10258                             pVar->idxReg               = UINT8_MAX;
    10259                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
    10260                             iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
    10261                         }
    10262                     }
    10263                 }
    10264                 else
    10265                     AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
    10266                                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
    10267             }
    10268         }
    10269 #if 0 //def VBOX_STRICT
    10270         iemNativeRegAssertSanity(pReNative);
    10271 #endif
    10272     }
    10273 
    10274     Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
    10275 
    10276 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    10277     /*
    10278      * If there are any stack arguments, make sure they are in their place as well.
    10279      *
    10280      * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
    10281      * the caller) be loading it later and it must be free (see first loop).
    10282      */
    10283     if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
    10284     {
    10285         for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
    10286         {
    10287             PIEMNATIVEVAR const pVar      = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
    10288             int32_t const       offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
    10289             if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    10290             {
    10291                 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
    10292                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
    10293                 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
    10294                 pVar->idxReg = UINT8_MAX;
    10295             }
    10296             else
    10297             {
    10298                 /* Use ARG0 as temp for stuff we need registers for. */
    10299                 switch (pVar->enmKind)
    10300                 {
    10301                     case kIemNativeVarKind_Stack:
    10302                     {
    10303                         uint8_t const idxStackSlot = pVar->idxStackSlot;
    10304                         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    10305                         off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
    10306                                                        iemNativeStackCalcBpDisp(idxStackSlot));
    10307                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    10308                         continue;
    10309                     }
    10310 
    10311                     case kIemNativeVarKind_Immediate:
    10312                         off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
    10313                         continue;
    10314 
    10315                     case kIemNativeVarKind_VarRef:
    10316                     {
    10317                         uint8_t const idxOtherVar    = pVar->u.idxRefVar; /* unpacked */
    10318                         Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    10319                         uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    10320                         int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    10321                         uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    10322                         if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    10323                         {
    10324                             off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    10325                             iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    10326                             Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    10327                         }
    10328                         Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    10329                                && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    10330                         off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
    10331                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    10332                         continue;
    10333                     }
    10334 
    10335                     case kIemNativeVarKind_GstRegRef:
    10336                         off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
    10337                                                              pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
    10338                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    10339                         continue;
    10340 
    10341                     case kIemNativeVarKind_Invalid:
    10342                     case kIemNativeVarKind_End:
    10343                         break;
    10344                 }
    10345                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    10346             }
    10347         }
    10348 # if 0 //def VBOX_STRICT
    10349         iemNativeRegAssertSanity(pReNative);
    10350 # endif
    10351     }
    10352 #else
    10353     AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
    10354 #endif
    10355 
    10356     /*
    10357      * Make sure the argument variables are loaded into their respective registers.
    10358      *
    10359      * We can optimize this by ASSUMING that any register allocations are for
    10360      * registeres that have already been loaded and are ready.  The previous step
    10361      * saw to that.
    10362      */
    10363     if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
    10364     {
    10365         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    10366         {
    10367             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    10368             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    10369                 Assert(   pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
    10370                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
    10371                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
    10372             else
    10373             {
    10374                 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
    10375                 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    10376                 {
    10377                     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    10378                     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
    10379                     pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
    10380                                               | RT_BIT_32(idxArgReg);
    10381                     pVar->idxReg = idxArgReg;
    10382                 }
    10383                 else
    10384                 {
    10385                     /* Use ARG0 as temp for stuff we need registers for. */
    10386                     switch (pVar->enmKind)
    10387                     {
    10388                         case kIemNativeVarKind_Stack:
    10389                         {
    10390                             uint8_t const idxStackSlot = pVar->idxStackSlot;
    10391                             AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    10392                             off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
    10393                             continue;
    10394                         }
    10395 
    10396                         case kIemNativeVarKind_Immediate:
    10397                             off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
    10398                             continue;
    10399 
    10400                         case kIemNativeVarKind_VarRef:
    10401                         {
    10402                             uint8_t const idxOtherVar    = pVar->u.idxRefVar; /* unpacked */
    10403                             Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    10404                             uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative,
    10405                                                                                     IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    10406                             int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    10407                             uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    10408                             if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    10409                             {
    10410                                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    10411                                 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    10412                                 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    10413                             }
    10414                             Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    10415                                    && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    10416                             off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
    10417                             continue;
    10418                         }
    10419 
    10420                         case kIemNativeVarKind_GstRegRef:
    10421                             off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
    10422                                                                  pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
    10423                             continue;
    10424 
    10425                         case kIemNativeVarKind_Invalid:
    10426                         case kIemNativeVarKind_End:
    10427                             break;
    10428                     }
    10429                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    10430                 }
    10431             }
    10432         }
    10433 #if 0 //def VBOX_STRICT
    10434         iemNativeRegAssertSanity(pReNative);
    10435 #endif
    10436     }
    10437 #ifdef VBOX_STRICT
    10438     else
    10439         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    10440         {
    10441             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
    10442             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
    10443         }
    10444 #endif
    10445 
    10446     /*
    10447      * Free all argument variables (simplified).
    10448      * Their lifetime always expires with the call they are for.
    10449      */
    10450     /** @todo Make the python script check that arguments aren't used after
    10451      *        IEM_MC_CALL_XXXX. */
    10452     /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
    10453      *        a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
    10454      *        an argument value.  There is also some FPU stuff. */
    10455     for (uint32_t i = cHiddenArgs; i < cArgs; i++)
    10456     {
    10457         uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
    10458         Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    10459 
    10460         /* no need to free registers: */
    10461         AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
    10462                   ?    pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
    10463                     || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
    10464                   : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
    10465                   ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
    10466                    i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
    10467 
    10468         pReNative->Core.aidxArgVars[i] = UINT8_MAX;
    10469         pReNative->Core.bmVars        &= ~RT_BIT_32(idxVar);
    10470         iemNativeVarFreeStackSlots(pReNative, idxVar);
    10471     }
    10472     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    10473 
    10474     /*
    10475      * Flush volatile registers as we make the call.
    10476      */
    10477     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
    10478 
    10479     return off;
    10480 }
    10481 
    10482 
    104831937/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
    10484 DECL_HIDDEN_THROW(uint32_t)
     1938DECL_INLINE_THROW(uint32_t)
    104851939iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
    104861940                             uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
     
    129054359
    129064360/*********************************************************************************************************************************
    12907 *   TLB Lookup.                                                                                                                  *
    12908 *********************************************************************************************************************************/
    12909 
    12910 /**
    12911  * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
    12912  */
    12913 DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
    12914 {
    12915     uint8_t const  iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
    12916     uint8_t const  cbMem   = RT_BYTE2(uSegAndSizeAndAccess);
    12917     uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
    12918     Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
    12919 
    12920     /* Do the lookup manually. */
    12921     RTGCPTR const      GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
    12922     uint64_t const     uTag      = IEMTLB_CALC_TAG(    &pVCpu->iem.s.DataTlb, GCPtrFlat);
    12923     PIEMTLBENTRY const pTlbe     = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
    12924     if (RT_LIKELY(pTlbe->uTag == uTag))
    12925     {
    12926         /*
    12927          * Check TLB page table level access flags.
    12928          */
    12929         AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
    12930         uint64_t const fNoUser          = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
    12931         uint64_t const fNoWriteNoDirty  = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
    12932                                         : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
    12933         uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & (  IEMTLBE_F_PHYS_REV       | IEMTLBE_F_NO_MAPPINGR3
    12934                                                                      | IEMTLBE_F_PG_UNASSIGNED
    12935                                                                      | IEMTLBE_F_PT_NO_ACCESSED
    12936                                                                      | fNoWriteNoDirty          | fNoUser);
    12937         uint64_t const uTlbPhysRev      = pVCpu->iem.s.DataTlb.uTlbPhysRev;
    12938         if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
    12939         {
    12940             /*
    12941              * Return the address.
    12942              */
    12943             uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
    12944             if ((uintptr_t)pbAddr == uResult)
    12945                 return;
    12946             RT_NOREF(cbMem);
    12947             AssertFailed();
    12948         }
    12949         else
    12950             AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
    12951                              fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
    12952     }
    12953     else
    12954         AssertFailed();
    12955     RT_BREAKPOINT();
    12956 }
    12957 
    12958 /* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
    12959 
    12960 
    12961 /*********************************************************************************************************************************
    129624361*   Memory fetches and stores common                                                                                             *
    129634362*********************************************************************************************************************************/
     
    131044503     * registers after returning from the call. Not sure if that's sensible or
    131054504     * not, though. */
    13106 #ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
     4505#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    131074506    off = iemNativeRegFlushPendingWrites(pReNative, off);
    131084507#else
     
    153806779
    153816780/*********************************************************************************************************************************
    15382 *   The native code generator functions for each MC block.                                                                       *
     6781*   Include instruction emitters.                                                                                                *
    153836782*********************************************************************************************************************************/
    15384 
    15385 /*
    15386  * Include instruction emitters.
    15387  */
    153886783#include "target-x86/IEMAllN8veEmit-x86.h"
    153896784
    15390 /*
    15391  * Include g_apfnIemNativeRecompileFunctions and associated functions.
    15392  *
    15393  * This should probably live in it's own file later, but lets see what the
    15394  * compile times turn out to be first.
    15395  */
    15396 #include "IEMNativeFunctions.cpp.h"
    15397 
    15398 
    15399 
    15400 /*********************************************************************************************************************************
    15401 *   Recompiler Core.                                                                                                             *
    15402 *********************************************************************************************************************************/
    15403 
    15404 
    15405 /** @callback_method_impl{FNDISREADBYTES, Dummy.} */
    15406 static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
    15407 {
    15408     RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
    15409     pDis->cbCachedInstr += cbMaxRead;
    15410     RT_NOREF(cbMinRead);
    15411     return VERR_NO_DATA;
    15412 }
    15413 
    15414 
    15415 DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
    15416 {
    15417     static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
    15418     {
    15419 #define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
    15420         ENTRY(fLocalForcedActions),
    15421         ENTRY(iem.s.rcPassUp),
    15422         ENTRY(iem.s.fExec),
    15423         ENTRY(iem.s.pbInstrBuf),
    15424         ENTRY(iem.s.uInstrBufPc),
    15425         ENTRY(iem.s.GCPhysInstrBuf),
    15426         ENTRY(iem.s.cbInstrBufTotal),
    15427         ENTRY(iem.s.idxTbCurInstr),
    15428 #ifdef VBOX_WITH_STATISTICS
    15429         ENTRY(iem.s.StatNativeTlbHitsForFetch),
    15430         ENTRY(iem.s.StatNativeTlbHitsForStore),
    15431         ENTRY(iem.s.StatNativeTlbHitsForStack),
    15432         ENTRY(iem.s.StatNativeTlbHitsForMapped),
    15433         ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
    15434         ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
    15435         ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
    15436         ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
    15437 #endif
    15438         ENTRY(iem.s.DataTlb.aEntries),
    15439         ENTRY(iem.s.DataTlb.uTlbRevision),
    15440         ENTRY(iem.s.DataTlb.uTlbPhysRev),
    15441         ENTRY(iem.s.DataTlb.cTlbHits),
    15442         ENTRY(iem.s.CodeTlb.aEntries),
    15443         ENTRY(iem.s.CodeTlb.uTlbRevision),
    15444         ENTRY(iem.s.CodeTlb.uTlbPhysRev),
    15445         ENTRY(iem.s.CodeTlb.cTlbHits),
    15446         ENTRY(pVMR3),
    15447         ENTRY(cpum.GstCtx.rax),
    15448         ENTRY(cpum.GstCtx.ah),
    15449         ENTRY(cpum.GstCtx.rcx),
    15450         ENTRY(cpum.GstCtx.ch),
    15451         ENTRY(cpum.GstCtx.rdx),
    15452         ENTRY(cpum.GstCtx.dh),
    15453         ENTRY(cpum.GstCtx.rbx),
    15454         ENTRY(cpum.GstCtx.bh),
    15455         ENTRY(cpum.GstCtx.rsp),
    15456         ENTRY(cpum.GstCtx.rbp),
    15457         ENTRY(cpum.GstCtx.rsi),
    15458         ENTRY(cpum.GstCtx.rdi),
    15459         ENTRY(cpum.GstCtx.r8),
    15460         ENTRY(cpum.GstCtx.r9),
    15461         ENTRY(cpum.GstCtx.r10),
    15462         ENTRY(cpum.GstCtx.r11),
    15463         ENTRY(cpum.GstCtx.r12),
    15464         ENTRY(cpum.GstCtx.r13),
    15465         ENTRY(cpum.GstCtx.r14),
    15466         ENTRY(cpum.GstCtx.r15),
    15467         ENTRY(cpum.GstCtx.es.Sel),
    15468         ENTRY(cpum.GstCtx.es.u64Base),
    15469         ENTRY(cpum.GstCtx.es.u32Limit),
    15470         ENTRY(cpum.GstCtx.es.Attr),
    15471         ENTRY(cpum.GstCtx.cs.Sel),
    15472         ENTRY(cpum.GstCtx.cs.u64Base),
    15473         ENTRY(cpum.GstCtx.cs.u32Limit),
    15474         ENTRY(cpum.GstCtx.cs.Attr),
    15475         ENTRY(cpum.GstCtx.ss.Sel),
    15476         ENTRY(cpum.GstCtx.ss.u64Base),
    15477         ENTRY(cpum.GstCtx.ss.u32Limit),
    15478         ENTRY(cpum.GstCtx.ss.Attr),
    15479         ENTRY(cpum.GstCtx.ds.Sel),
    15480         ENTRY(cpum.GstCtx.ds.u64Base),
    15481         ENTRY(cpum.GstCtx.ds.u32Limit),
    15482         ENTRY(cpum.GstCtx.ds.Attr),
    15483         ENTRY(cpum.GstCtx.fs.Sel),
    15484         ENTRY(cpum.GstCtx.fs.u64Base),
    15485         ENTRY(cpum.GstCtx.fs.u32Limit),
    15486         ENTRY(cpum.GstCtx.fs.Attr),
    15487         ENTRY(cpum.GstCtx.gs.Sel),
    15488         ENTRY(cpum.GstCtx.gs.u64Base),
    15489         ENTRY(cpum.GstCtx.gs.u32Limit),
    15490         ENTRY(cpum.GstCtx.gs.Attr),
    15491         ENTRY(cpum.GstCtx.rip),
    15492         ENTRY(cpum.GstCtx.eflags),
    15493         ENTRY(cpum.GstCtx.uRipInhibitInt),
    15494 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    15495         ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
    15496         ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
    15497         ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
    15498         ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
    15499         ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
    15500         ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
    15501         ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
    15502         ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
    15503         ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
    15504         ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
    15505         ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
    15506         ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
    15507         ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
    15508         ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
    15509         ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
    15510         ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
    15511         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
    15512         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
    15513         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
    15514         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
    15515         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
    15516         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
    15517         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
    15518         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
    15519         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
    15520         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
    15521         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
    15522         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
    15523         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
    15524         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
    15525         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
    15526         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
    15527 #endif
    15528 #undef ENTRY
    15529     };
    15530 #ifdef VBOX_STRICT
    15531     static bool s_fOrderChecked = false;
    15532     if (!s_fOrderChecked)
    15533     {
    15534         s_fOrderChecked = true;
    15535         uint32_t offPrev = s_aMembers[0].off;
    15536         for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
    15537         {
    15538             Assert(s_aMembers[i].off > offPrev);
    15539             offPrev = s_aMembers[i].off;
    15540         }
    15541     }
    15542 #endif
    15543 
    15544     /*
    15545      * Binary lookup.
    15546      */
    15547     unsigned iStart = 0;
    15548     unsigned iEnd   = RT_ELEMENTS(s_aMembers);
    15549     for (;;)
    15550     {
    15551         unsigned const iCur   = iStart + (iEnd - iStart) / 2;
    15552         uint32_t const offCur = s_aMembers[iCur].off;
    15553         if (off < offCur)
    15554         {
    15555             if (iCur != iStart)
    15556                 iEnd = iCur;
    15557             else
    15558                 break;
    15559         }
    15560         else if (off > offCur)
    15561         {
    15562             if (iCur + 1 < iEnd)
    15563                 iStart = iCur + 1;
    15564             else
    15565                 break;
    15566         }
    15567         else
    15568             return s_aMembers[iCur].pszName;
    15569     }
    15570 #ifdef VBOX_WITH_STATISTICS
    15571     if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
    15572         return "iem.s.acThreadedFuncStats[iFn]";
    15573 #endif
    15574     return NULL;
    15575 }
    15576 
    15577 
    15578 /**
    15579  * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
    15580  * @returns pszBuf.
    15581  * @param   fFlags  The flags.
    15582  * @param   pszBuf  The output buffer.
    15583  * @param   cbBuf   The output buffer size.  At least 32 bytes.
    15584  */
    15585 DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
    15586 {
    15587     Assert(cbBuf >= 32);
    15588     static RTSTRTUPLE const s_aModes[] =
    15589     {
    15590         /* [00] = */ { RT_STR_TUPLE("16BIT") },
    15591         /* [01] = */ { RT_STR_TUPLE("32BIT") },
    15592         /* [02] = */ { RT_STR_TUPLE("!2!") },
    15593         /* [03] = */ { RT_STR_TUPLE("!3!") },
    15594         /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
    15595         /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
    15596         /* [06] = */ { RT_STR_TUPLE("!6!") },
    15597         /* [07] = */ { RT_STR_TUPLE("!7!") },
    15598         /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
    15599         /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
    15600         /* [0a] = */ { RT_STR_TUPLE("64BIT") },
    15601         /* [0b] = */ { RT_STR_TUPLE("!b!") },
    15602         /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
    15603         /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
    15604         /* [0e] = */ { RT_STR_TUPLE("!e!") },
    15605         /* [0f] = */ { RT_STR_TUPLE("!f!") },
    15606         /* [10] = */ { RT_STR_TUPLE("!10!") },
    15607         /* [11] = */ { RT_STR_TUPLE("!11!") },
    15608         /* [12] = */ { RT_STR_TUPLE("!12!") },
    15609         /* [13] = */ { RT_STR_TUPLE("!13!") },
    15610         /* [14] = */ { RT_STR_TUPLE("!14!") },
    15611         /* [15] = */ { RT_STR_TUPLE("!15!") },
    15612         /* [16] = */ { RT_STR_TUPLE("!16!") },
    15613         /* [17] = */ { RT_STR_TUPLE("!17!") },
    15614         /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
    15615         /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
    15616         /* [1a] = */ { RT_STR_TUPLE("!1a!") },
    15617         /* [1b] = */ { RT_STR_TUPLE("!1b!") },
    15618         /* [1c] = */ { RT_STR_TUPLE("!1c!") },
    15619         /* [1d] = */ { RT_STR_TUPLE("!1d!") },
    15620         /* [1e] = */ { RT_STR_TUPLE("!1e!") },
    15621         /* [1f] = */ { RT_STR_TUPLE("!1f!") },
    15622     };
    15623     AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
    15624     memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
    15625     size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
    15626 
    15627     pszBuf[off++] = ' ';
    15628     pszBuf[off++] = 'C';
    15629     pszBuf[off++] = 'P';
    15630     pszBuf[off++] = 'L';
    15631     pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
    15632     Assert(off < 32);
    15633 
    15634     fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
    15635 
    15636     static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
    15637     {
    15638         { RT_STR_TUPLE("BYPASS_HANDLERS"),      IEM_F_BYPASS_HANDLERS    },
    15639         { RT_STR_TUPLE("PENDING_BRK_INSTR"),    IEM_F_PENDING_BRK_INSTR  },
    15640         { RT_STR_TUPLE("PENDING_BRK_DATA"),     IEM_F_PENDING_BRK_DATA   },
    15641         { RT_STR_TUPLE("PENDING_BRK_X86_IO"),   IEM_F_PENDING_BRK_X86_IO },
    15642         { RT_STR_TUPLE("X86_DISREGARD_LOCK"),   IEM_F_X86_DISREGARD_LOCK },
    15643         { RT_STR_TUPLE("X86_CTX_VMX"),          IEM_F_X86_CTX_VMX        },
    15644         { RT_STR_TUPLE("X86_CTX_SVM"),          IEM_F_X86_CTX_SVM        },
    15645         { RT_STR_TUPLE("X86_CTX_IN_GUEST"),     IEM_F_X86_CTX_IN_GUEST   },
    15646         { RT_STR_TUPLE("X86_CTX_SMM"),          IEM_F_X86_CTX_SMM        },
    15647         { RT_STR_TUPLE("INHIBIT_SHADOW"),       IEMTB_F_INHIBIT_SHADOW   },
    15648         { RT_STR_TUPLE("INHIBIT_NMI"),          IEMTB_F_INHIBIT_NMI      },
    15649         { RT_STR_TUPLE("CS_LIM_CHECKS"),        IEMTB_F_CS_LIM_CHECKS    },
    15650         { RT_STR_TUPLE("TYPE_THREADED"),        IEMTB_F_TYPE_THREADED    },
    15651         { RT_STR_TUPLE("TYPE_NATIVE"),          IEMTB_F_TYPE_NATIVE      },
    15652     };
    15653     if (fFlags)
    15654         for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
    15655             if (s_aFlags[i].fFlag & fFlags)
    15656             {
    15657                 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
    15658                 pszBuf[off++] = ' ';
    15659                 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
    15660                 off += s_aFlags[i].cchName;
    15661                 fFlags &= ~s_aFlags[i].fFlag;
    15662                 if (!fFlags)
    15663                     break;
    15664             }
    15665     pszBuf[off] = '\0';
    15666 
    15667     return pszBuf;
    15668 }
    15669 
    15670 
    15671 DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
    15672 {
    15673     AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
    15674 #if defined(RT_ARCH_AMD64)
    15675     static const char * const a_apszMarkers[] =
    15676     {
    15677         /*[0]=*/ "unknown0",        "CheckCsLim",           "ConsiderLimChecking",  "CheckOpcodes",
    15678         /*[4]=*/ "PcAfterBranch",   "LoadTlbForNewPage",    "LoadTlbAfterBranch"
    15679     };
    15680 #endif
    15681 
    15682     char                    szDisBuf[512];
    15683     DISSTATE                Dis;
    15684     PCIEMNATIVEINSTR const  paNative      = pTb->Native.paInstructions;
    15685     uint32_t const          cNative       = pTb->Native.cInstructions;
    15686     uint32_t                offNative     = 0;
    15687 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    15688     PCIEMTBDBG const        pDbgInfo      = pTb->pDbgInfo;
    15689 #endif
    15690     DISCPUMODE              enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    15691                                           : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    15692                                           :                                                            DISCPUMODE_64BIT;
    15693 #if   defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    15694     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_64BIT;
    15695 #elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    15696     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_ARMV8_A64;
    15697 #elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    15698 # error "Port me"
    15699 #else
    15700     csh                     hDisasm       = ~(size_t)0;
    15701 # if defined(RT_ARCH_AMD64)
    15702     cs_err                  rcCs          = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
    15703 # elif defined(RT_ARCH_ARM64)
    15704     cs_err                  rcCs          = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
    15705 # else
    15706 #  error "Port me"
    15707 # endif
    15708     AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
    15709 
    15710     //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON);  - not needed as pInstr->detail doesn't provide full memory detail.
    15711     //Assert(rcCs == CS_ERR_OK);
    15712 #endif
    15713 
    15714     /*
    15715      * Print TB info.
    15716      */
    15717     pHlp->pfnPrintf(pHlp,
    15718                     "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
    15719                     "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
    15720                     pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
    15721                     pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
    15722 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    15723     if (pDbgInfo && pDbgInfo->cEntries > 1)
    15724     {
    15725         Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
    15726 
    15727         /*
    15728          * This disassembly is driven by the debug info which follows the native
    15729          * code and indicates when it starts with the next guest instructions,
    15730          * where labels are and such things.
    15731          */
    15732         uint32_t                idxThreadedCall  = 0;
    15733         uint32_t                fExec            = pTb->fFlags & UINT32_C(0x00ffffff);
    15734         uint8_t                 idxRange         = UINT8_MAX;
    15735         uint8_t const           cRanges          = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
    15736         uint32_t                offRange         = 0;
    15737         uint32_t                offOpcodes       = 0;
    15738         uint32_t const          cbOpcodes        = pTb->cbOpcodes;
    15739         RTGCPHYS                GCPhysPc         = pTb->GCPhysPc;
    15740         uint32_t const          cDbgEntries      = pDbgInfo->cEntries;
    15741         uint32_t                iDbgEntry        = 1;
    15742         uint32_t                offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
    15743 
    15744         while (offNative < cNative)
    15745         {
    15746             /* If we're at or have passed the point where the next chunk of debug
    15747                info starts, process it. */
    15748             if (offDbgNativeNext <= offNative)
    15749             {
    15750                 offDbgNativeNext = UINT32_MAX;
    15751                 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
    15752                 {
    15753                     switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
    15754                     {
    15755                         case kIemTbDbgEntryType_GuestInstruction:
    15756                         {
    15757                             /* Did the exec flag change? */
    15758                             if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
    15759                             {
    15760                                 pHlp->pfnPrintf(pHlp,
    15761                                                 "  fExec change %#08x -> %#08x %s\n",
    15762                                                 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    15763                                                 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    15764                                                                    szDisBuf, sizeof(szDisBuf)));
    15765                                 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
    15766                                 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    15767                                               : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    15768                                               :                                                      DISCPUMODE_64BIT;
    15769                             }
    15770 
    15771                             /* New opcode range? We need to fend up a spurious debug info entry here for cases
    15772                                where the compilation was aborted before the opcode was recorded and the actual
    15773                                instruction was translated to a threaded call.  This may happen when we run out
    15774                                of ranges, or when some complicated interrupts/FFs are found to be pending or
    15775                                similar.  So, we just deal with it here rather than in the compiler code as it
    15776                                is a lot simpler to do here. */
    15777                             if (   idxRange == UINT8_MAX
    15778                                 || idxRange >= cRanges
    15779                                 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
    15780                             {
    15781                                 idxRange += 1;
    15782                                 if (idxRange < cRanges)
    15783                                     offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
    15784                                 else
    15785                                     continue;
    15786                                 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
    15787                                 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
    15788                                          + (pTb->aRanges[idxRange].idxPhysPage == 0
    15789                                             ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    15790                                             : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
    15791                                 pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    15792                                                 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
    15793                                                 pTb->aRanges[idxRange].idxPhysPage);
    15794                                 GCPhysPc += offRange;
    15795                             }
    15796 
    15797                             /* Disassemble the instruction. */
    15798                             //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
    15799                             uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
    15800                             uint32_t      cbInstr    = 1;
    15801                             int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    15802                                                                  &pTb->pabOpcodes[offOpcodes], cbInstrMax,
    15803                                                                  iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    15804                             if (RT_SUCCESS(rc))
    15805                             {
    15806                                 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    15807                                                              DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    15808                                                              | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    15809                                                              NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    15810 
    15811                                 static unsigned const s_offMarker  = 55;
    15812                                 static char const     s_szMarker[] = " ; <--- guest";
    15813                                 if (cch < s_offMarker)
    15814                                 {
    15815                                     memset(&szDisBuf[cch], ' ', s_offMarker - cch);
    15816                                     cch = s_offMarker;
    15817                                 }
    15818                                 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
    15819                                     memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
    15820 
    15821                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
    15822                             }
    15823                             else
    15824                             {
    15825                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
    15826                                                 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
    15827                                 cbInstr = 1;
    15828                             }
    15829                             GCPhysPc   += cbInstr;
    15830                             offOpcodes += cbInstr;
    15831                             offRange   += cbInstr;
    15832                             continue;
    15833                         }
    15834 
    15835                         case kIemTbDbgEntryType_ThreadedCall:
    15836                             pHlp->pfnPrintf(pHlp,
    15837                                             "  Call #%u to %s (%u args) - %s\n",
    15838                                             idxThreadedCall,
    15839                                             g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    15840                                             g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    15841                                             pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
    15842                             idxThreadedCall++;
    15843                             continue;
    15844 
    15845                         case kIemTbDbgEntryType_GuestRegShadowing:
    15846                         {
    15847                             PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
    15848                             const char * const    pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
    15849                             if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
    15850                                 pHlp->pfnPrintf(pHlp, "  Guest register %s != host register %s\n", pszGstReg,
    15851                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    15852                             else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
    15853                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s\n", pszGstReg,
    15854                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
    15855                             else
    15856                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s (previously in %s)\n", pszGstReg,
    15857                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
    15858                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    15859                             continue;
    15860                         }
    15861 
    15862 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    15863                         case kIemTbDbgEntryType_GuestSimdRegShadowing:
    15864                         {
    15865                             PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
    15866                             const char * const    pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
    15867                             if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
    15868                                 pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
    15869                                                 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
    15870                             else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
    15871                                 pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
    15872                                                 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
    15873                             else
    15874                                 pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
    15875                                                 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
    15876                                                 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
    15877                             continue;
    15878                         }
    15879 #endif
    15880 
    15881                         case kIemTbDbgEntryType_Label:
    15882                         {
    15883                             const char *pszName    = "what_the_fudge";
    15884                             const char *pszComment = "";
    15885                             bool        fNumbered  = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
    15886                             switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
    15887                             {
    15888                                 case kIemNativeLabelType_Return:
    15889                                     pszName = "Return";
    15890                                     break;
    15891                                 case kIemNativeLabelType_ReturnBreak:
    15892                                     pszName = "ReturnBreak";
    15893                                     break;
    15894                                 case kIemNativeLabelType_ReturnWithFlags:
    15895                                     pszName = "ReturnWithFlags";
    15896                                     break;
    15897                                 case kIemNativeLabelType_NonZeroRetOrPassUp:
    15898                                     pszName = "NonZeroRetOrPassUp";
    15899                                     break;
    15900                                 case kIemNativeLabelType_RaiseGp0:
    15901                                     pszName = "RaiseGp0";
    15902                                     break;
    15903                                 case kIemNativeLabelType_RaiseNm:
    15904                                     pszName = "RaiseNm";
    15905                                     break;
    15906                                 case kIemNativeLabelType_RaiseUd:
    15907                                     pszName = "RaiseUd";
    15908                                     break;
    15909                                 case kIemNativeLabelType_RaiseMf:
    15910                                     pszName = "RaiseMf";
    15911                                     break;
    15912                                 case kIemNativeLabelType_RaiseXf:
    15913                                     pszName = "RaiseXf";
    15914                                     break;
    15915                                 case kIemNativeLabelType_ObsoleteTb:
    15916                                     pszName = "ObsoleteTb";
    15917                                     break;
    15918                                 case kIemNativeLabelType_NeedCsLimChecking:
    15919                                     pszName = "NeedCsLimChecking";
    15920                                     break;
    15921                                 case kIemNativeLabelType_CheckBranchMiss:
    15922                                     pszName = "CheckBranchMiss";
    15923                                     break;
    15924                                 case kIemNativeLabelType_If:
    15925                                     pszName = "If";
    15926                                     fNumbered = true;
    15927                                     break;
    15928                                 case kIemNativeLabelType_Else:
    15929                                     pszName = "Else";
    15930                                     fNumbered = true;
    15931                                     pszComment = "   ; regs state restored pre-if-block";
    15932                                     break;
    15933                                 case kIemNativeLabelType_Endif:
    15934                                     pszName = "Endif";
    15935                                     fNumbered = true;
    15936                                     break;
    15937                                 case kIemNativeLabelType_CheckIrq:
    15938                                     pszName = "CheckIrq_CheckVM";
    15939                                     fNumbered = true;
    15940                                     break;
    15941                                 case kIemNativeLabelType_TlbLookup:
    15942                                     pszName = "TlbLookup";
    15943                                     fNumbered = true;
    15944                                     break;
    15945                                 case kIemNativeLabelType_TlbMiss:
    15946                                     pszName = "TlbMiss";
    15947                                     fNumbered = true;
    15948                                     break;
    15949                                 case kIemNativeLabelType_TlbDone:
    15950                                     pszName = "TlbDone";
    15951                                     fNumbered = true;
    15952                                     break;
    15953                                 case kIemNativeLabelType_Invalid:
    15954                                 case kIemNativeLabelType_End:
    15955                                     break;
    15956                             }
    15957                             if (fNumbered)
    15958                                 pHlp->pfnPrintf(pHlp, "  %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
    15959                             else
    15960                                 pHlp->pfnPrintf(pHlp, "  %s:\n", pszName);
    15961                             continue;
    15962                         }
    15963 
    15964                         case kIemTbDbgEntryType_NativeOffset:
    15965                             offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
    15966                             Assert(offDbgNativeNext > offNative);
    15967                             break;
    15968 
    15969 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    15970                         case kIemTbDbgEntryType_DelayedPcUpdate:
    15971                             pHlp->pfnPrintf(pHlp,
    15972                                             "  Updating guest PC value by %u (cInstrSkipped=%u)\n",
    15973                                             pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
    15974                                             pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
    15975                             continue;
    15976 #endif
    15977 
    15978                         default:
    15979                             AssertFailed();
    15980                     }
    15981                     iDbgEntry++;
    15982                     break;
    15983                 }
    15984             }
    15985 
    15986             /*
    15987              * Disassemble the next native instruction.
    15988              */
    15989             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    15990 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    15991             uint32_t               cbInstr    = sizeof(paNative[0]);
    15992             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    15993             if (RT_SUCCESS(rc))
    15994             {
    15995 #  if defined(RT_ARCH_AMD64)
    15996                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    15997                 {
    15998                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    15999                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    16000                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    16001                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    16002                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    16003                                         uInfo & 0x8000 ? "recompiled" : "todo");
    16004                     else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
    16005                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
    16006                     else
    16007                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    16008                 }
    16009                 else
    16010 #  endif
    16011                 {
    16012                     const char *pszAnnotation = NULL;
    16013 #  ifdef RT_ARCH_AMD64
    16014                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    16015                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    16016                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    16017                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    16018                     PCDISOPPARAM pMemOp;
    16019                     if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
    16020                         pMemOp = &Dis.Param1;
    16021                     else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
    16022                         pMemOp = &Dis.Param2;
    16023                     else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
    16024                         pMemOp = &Dis.Param3;
    16025                     else
    16026                         pMemOp = NULL;
    16027                     if (   pMemOp
    16028                         && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
    16029                         && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
    16030                         pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
    16031                                                                      ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
    16032 
    16033 #elif defined(RT_ARCH_ARM64)
    16034                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    16035                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    16036                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    16037 #  else
    16038 #   error "Port me"
    16039 #  endif
    16040                     if (pszAnnotation)
    16041                     {
    16042                         static unsigned const s_offAnnotation = 55;
    16043                         size_t const          cchAnnotation   = strlen(pszAnnotation);
    16044                         size_t                cchDis          = strlen(szDisBuf);
    16045                         if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
    16046                         {
    16047                             if (cchDis < s_offAnnotation)
    16048                             {
    16049                                 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
    16050                                 cchDis = s_offAnnotation;
    16051                             }
    16052                             szDisBuf[cchDis++] = ' ';
    16053                             szDisBuf[cchDis++] = ';';
    16054                             szDisBuf[cchDis++] = ' ';
    16055                             memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
    16056                         }
    16057                     }
    16058                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    16059                 }
    16060             }
    16061             else
    16062             {
    16063 #  if defined(RT_ARCH_AMD64)
    16064                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    16065                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    16066 #  elif defined(RT_ARCH_ARM64)
    16067                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    16068 #  else
    16069 #   error "Port me"
    16070 #  endif
    16071                 cbInstr = sizeof(paNative[0]);
    16072             }
    16073             offNative += cbInstr / sizeof(paNative[0]);
    16074 
    16075 #  else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    16076             cs_insn *pInstr;
    16077             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    16078                                          (uintptr_t)pNativeCur, 1, &pInstr);
    16079             if (cInstrs > 0)
    16080             {
    16081                 Assert(cInstrs == 1);
    16082                 const char *pszAnnotation = NULL;
    16083 #  if defined(RT_ARCH_ARM64)
    16084                 if (   (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
    16085                     || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
    16086                 {
    16087                     /* This is bit crappy, but the disassembler provides incomplete addressing details. */
    16088                     AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
    16089                     char *psz = strchr(pInstr->op_str, '[');
    16090                     if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
    16091                     {
    16092                         uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
    16093                         int32_t        off     = -1;
    16094                         psz += 4;
    16095                         if (*psz == ']')
    16096                             off = 0;
    16097                         else if (*psz == ',')
    16098                         {
    16099                             psz = RTStrStripL(psz + 1);
    16100                             if (*psz == '#')
    16101                                 off = RTStrToInt32(&psz[1]);
    16102                             /** @todo deal with index registers and LSL as well... */
    16103                         }
    16104                         if (off >= 0)
    16105                             pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
    16106                     }
    16107                 }
    16108 #  endif
    16109 
    16110                 size_t const cchOp = strlen(pInstr->op_str);
    16111 #  if defined(RT_ARCH_AMD64)
    16112                 if (pszAnnotation)
    16113                     pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s%*s ; %s\n",
    16114                                     pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
    16115                                     cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
    16116                 else
    16117                     pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    16118                                     pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    16119 
    16120 #  else
    16121                 if (pszAnnotation)
    16122                     pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s%*s ; %s\n",
    16123                                     pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
    16124                                     cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
    16125                 else
    16126                     pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    16127                                     pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    16128 #  endif
    16129                 offNative += pInstr->size / sizeof(*pNativeCur);
    16130                 cs_free(pInstr, cInstrs);
    16131             }
    16132             else
    16133             {
    16134 #  if defined(RT_ARCH_AMD64)
    16135                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    16136                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    16137 #  else
    16138                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    16139 #  endif
    16140                 offNative++;
    16141             }
    16142 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    16143         }
    16144     }
    16145     else
    16146 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    16147     {
    16148         /*
    16149          * No debug info, just disassemble the x86 code and then the native code.
    16150          *
    16151          * First the guest code:
    16152          */
    16153         for (unsigned i = 0; i < pTb->cRanges; i++)
    16154         {
    16155             RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
    16156                               + (pTb->aRanges[i].idxPhysPage == 0
    16157                                  ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    16158                                  : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
    16159             pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    16160                             i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
    16161             unsigned       off       = pTb->aRanges[i].offOpcodes;
    16162             /** @todo this ain't working when crossing pages!   */
    16163             unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
    16164             while (off < cbOpcodes)
    16165             {
    16166                 uint32_t cbInstr = 1;
    16167                 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    16168                                                      &pTb->pabOpcodes[off], cbOpcodes - off,
    16169                                                      iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    16170                 if (RT_SUCCESS(rc))
    16171                 {
    16172                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    16173                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    16174                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    16175                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    16176                     pHlp->pfnPrintf(pHlp, "    %RGp: %s\n", GCPhysPc, szDisBuf);
    16177                     GCPhysPc += cbInstr;
    16178                     off      += cbInstr;
    16179                 }
    16180                 else
    16181                 {
    16182                     pHlp->pfnPrintf(pHlp, "    %RGp: %.*Rhxs - disassembly failure %Rrc\n",
    16183                                     GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
    16184                     break;
    16185                 }
    16186             }
    16187         }
    16188 
    16189         /*
    16190          * Then the native code:
    16191          */
    16192         pHlp->pfnPrintf(pHlp, "  Native code %p L %#x\n", paNative, cNative);
    16193         while (offNative < cNative)
    16194         {
    16195             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    16196 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    16197             uint32_t               cbInstr    = sizeof(paNative[0]);
    16198             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    16199             if (RT_SUCCESS(rc))
    16200             {
    16201 #  if defined(RT_ARCH_AMD64)
    16202                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    16203                 {
    16204                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    16205                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    16206                         pHlp->pfnPrintf(pHlp, "\n    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    16207                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    16208                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    16209                                         uInfo & 0x8000 ? "recompiled" : "todo");
    16210                     else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
    16211                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
    16212                     else
    16213                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    16214                 }
    16215                 else
    16216 #  endif
    16217                 {
    16218 #  ifdef RT_ARCH_AMD64
    16219                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    16220                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    16221                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    16222                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    16223 #  elif defined(RT_ARCH_ARM64)
    16224                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    16225                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    16226                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    16227 #  else
    16228 #   error "Port me"
    16229 #  endif
    16230                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    16231                 }
    16232             }
    16233             else
    16234             {
    16235 #  if defined(RT_ARCH_AMD64)
    16236                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    16237                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    16238 #  else
    16239                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    16240 #  endif
    16241                 cbInstr = sizeof(paNative[0]);
    16242             }
    16243             offNative += cbInstr / sizeof(paNative[0]);
    16244 
    16245 # else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    16246             cs_insn *pInstr;
    16247             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    16248                                          (uintptr_t)pNativeCur, 1, &pInstr);
    16249             if (cInstrs > 0)
    16250             {
    16251                 Assert(cInstrs == 1);
    16252 #  if defined(RT_ARCH_AMD64)
    16253                 pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    16254                                 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    16255 #  else
    16256                 pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    16257                                 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    16258 #  endif
    16259                 offNative += pInstr->size / sizeof(*pNativeCur);
    16260                 cs_free(pInstr, cInstrs);
    16261             }
    16262             else
    16263             {
    16264 #  if defined(RT_ARCH_AMD64)
    16265                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    16266                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    16267 #  else
    16268                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    16269 #  endif
    16270                 offNative++;
    16271             }
    16272 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    16273         }
    16274     }
    16275 
    16276 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    16277     /* Cleanup. */
    16278     cs_close(&hDisasm);
    16279 #endif
    16280 }
    16281 
    16282 
    16283 /**
    16284  * Recompiles the given threaded TB into a native one.
    16285  *
    16286  * In case of failure the translation block will be returned as-is.
    16287  *
    16288  * @returns pTb.
    16289  * @param   pVCpu   The cross context virtual CPU structure of the calling
    16290  *                  thread.
    16291  * @param   pTb     The threaded translation to recompile to native.
    16292  */
    16293 DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
    16294 {
    16295     STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
    16296 
    16297     /*
    16298      * The first time thru, we allocate the recompiler state, the other times
    16299      * we just need to reset it before using it again.
    16300      */
    16301     PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
    16302     if (RT_LIKELY(pReNative))
    16303         iemNativeReInit(pReNative, pTb);
    16304     else
    16305     {
    16306         pReNative = iemNativeInit(pVCpu, pTb);
    16307         AssertReturn(pReNative, pTb);
    16308     }
    16309 
    16310 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    16311     /*
    16312      * First do liveness analysis.  This is done backwards.
    16313      */
    16314     {
    16315         uint32_t idxCall = pTb->Thrd.cCalls;
    16316         if (idxCall <= pReNative->cLivenessEntriesAlloc)
    16317         { /* likely */ }
    16318         else
    16319         {
    16320             uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
    16321             while (idxCall > cAlloc)
    16322                 cAlloc *= 2;
    16323             void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
    16324             AssertReturn(pvNew, pTb);
    16325             pReNative->paLivenessEntries     = (PIEMLIVENESSENTRY)pvNew;
    16326             pReNative->cLivenessEntriesAlloc = cAlloc;
    16327         }
    16328         AssertReturn(idxCall > 0, pTb);
    16329         PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
    16330 
    16331         /* The initial (final) entry. */
    16332         idxCall--;
    16333         IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
    16334 
    16335         /* Loop backwards thru the calls and fill in the other entries. */
    16336         PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
    16337         while (idxCall > 0)
    16338         {
    16339             PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
    16340             if (pfnLiveness)
    16341                 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
    16342             else
    16343                 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
    16344             pCallEntry--;
    16345             idxCall--;
    16346         }
    16347 
    16348 # ifdef VBOX_WITH_STATISTICS
    16349         /* Check if there are any EFLAGS optimization to be had here.  This requires someone settings them
    16350            to 'clobbered' rather that 'input'.  */
    16351         /** @todo */
    16352 # endif
    16353     }
    16354 #endif
    16355 
    16356     /*
    16357      * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
    16358      * for aborting if an error happens.
    16359      */
    16360     uint32_t        cCallsLeft = pTb->Thrd.cCalls;
    16361 #ifdef LOG_ENABLED
    16362     uint32_t const  cCallsOrg  = cCallsLeft;
    16363 #endif
    16364     uint32_t        off        = 0;
    16365     int             rc         = VINF_SUCCESS;
    16366     IEMNATIVE_TRY_SETJMP(pReNative, rc)
    16367     {
    16368         /*
    16369          * Emit prolog code (fixed).
    16370          */
    16371         off = iemNativeEmitProlog(pReNative, off);
    16372 
    16373         /*
    16374          * Convert the calls to native code.
    16375          */
    16376 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    16377         int32_t              iGstInstr        = -1;
    16378 #endif
    16379 #ifndef VBOX_WITHOUT_RELEASE_STATISTICS
    16380         uint32_t             cThreadedCalls   = 0;
    16381         uint32_t             cRecompiledCalls = 0;
    16382 #endif
    16383 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
    16384         uint32_t             idxCurCall       = 0;
    16385 #endif
    16386         PCIEMTHRDEDCALLENTRY pCallEntry       = pTb->Thrd.paCalls;
    16387         pReNative->fExec                      = pTb->fFlags & IEMTB_F_IEM_F_MASK;
    16388         while (cCallsLeft-- > 0)
    16389         {
    16390             PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
    16391 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    16392             pReNative->idxCurCall                 = idxCurCall;
    16393 #endif
    16394 
    16395             /*
    16396              * Debug info, assembly markup and statistics.
    16397              */
    16398 #if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
    16399             if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
    16400                 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
    16401 #endif
    16402 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    16403             iemNativeDbgInfoAddNativeOffset(pReNative, off);
    16404             if (iGstInstr < (int32_t)pCallEntry->idxInstr)
    16405             {
    16406                 if (iGstInstr < (int32_t)pTb->cInstructions)
    16407                     iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
    16408                 else
    16409                     Assert(iGstInstr == pTb->cInstructions);
    16410                 iGstInstr = pCallEntry->idxInstr;
    16411             }
    16412             iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
    16413 #endif
    16414 #if defined(VBOX_STRICT)
    16415             off = iemNativeEmitMarker(pReNative, off,
    16416                                       RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
    16417 #endif
    16418 #if defined(VBOX_STRICT)
    16419             iemNativeRegAssertSanity(pReNative);
    16420 #endif
    16421 #ifdef VBOX_WITH_STATISTICS
    16422             off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
    16423 #endif
    16424 
    16425             /*
    16426              * Actual work.
    16427              */
    16428             Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
    16429                   pfnRecom ? "(recompiled)" : "(todo)"));
    16430             if (pfnRecom) /** @todo stats on this.   */
    16431             {
    16432                 off = pfnRecom(pReNative, off, pCallEntry);
    16433                 STAM_REL_STATS({cRecompiledCalls++;});
    16434             }
    16435             else
    16436             {
    16437                 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
    16438                 STAM_REL_STATS({cThreadedCalls++;});
    16439             }
    16440             Assert(off <= pReNative->cInstrBufAlloc);
    16441             Assert(pReNative->cCondDepth == 0);
    16442 
    16443 #if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
    16444             if (LogIs2Enabled())
    16445             {
    16446                 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
    16447 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    16448                 static const char s_achState[] = "CUXI";
    16449 # else
    16450                 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
    16451 # endif
    16452 
    16453                 char szGpr[17];
    16454                 for (unsigned i = 0; i < 16; i++)
    16455                     szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
    16456                 szGpr[16] = '\0';
    16457 
    16458                 char szSegBase[X86_SREG_COUNT + 1];
    16459                 char szSegLimit[X86_SREG_COUNT + 1];
    16460                 char szSegAttrib[X86_SREG_COUNT + 1];
    16461                 char szSegSel[X86_SREG_COUNT + 1];
    16462                 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
    16463                 {
    16464                     szSegBase[i]   = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
    16465                     szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
    16466                     szSegLimit[i]  = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
    16467                     szSegSel[i]    = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
    16468                 }
    16469                 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
    16470                     = szSegSel[X86_SREG_COUNT] = '\0';
    16471 
    16472                 char szEFlags[8];
    16473                 for (unsigned i = 0; i < 7; i++)
    16474                     szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
    16475                 szEFlags[7] = '\0';
    16476 
    16477                 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
    16478                       szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
    16479             }
    16480 #endif
    16481 
    16482             /*
    16483              * Advance.
    16484              */
    16485             pCallEntry++;
    16486 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
    16487             idxCurCall++;
    16488 #endif
    16489         }
    16490 
    16491         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
    16492         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded,   cThreadedCalls);
    16493         if (!cThreadedCalls)
    16494             STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
    16495 
    16496         /*
    16497          * Emit the epilog code.
    16498          */
    16499         uint32_t idxReturnLabel;
    16500         off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
    16501 
    16502         /*
    16503          * Generate special jump labels.
    16504          */
    16505         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
    16506             off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
    16507         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
    16508             off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
    16509         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
    16510             off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
    16511         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
    16512             off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
    16513         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
    16514             off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
    16515         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
    16516             off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
    16517         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
    16518             off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
    16519         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
    16520             off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
    16521         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
    16522             off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
    16523         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
    16524             off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
    16525     }
    16526     IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
    16527     {
    16528         Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
    16529         return pTb;
    16530     }
    16531     IEMNATIVE_CATCH_LONGJMP_END(pReNative);
    16532     Assert(off <= pReNative->cInstrBufAlloc);
    16533 
    16534     /*
    16535      * Make sure all labels has been defined.
    16536      */
    16537     PIEMNATIVELABEL const paLabels = pReNative->paLabels;
    16538 #ifdef VBOX_STRICT
    16539     uint32_t const        cLabels  = pReNative->cLabels;
    16540     for (uint32_t i = 0; i < cLabels; i++)
    16541         AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
    16542 #endif
    16543 
    16544     /*
    16545      * Allocate executable memory, copy over the code we've generated.
    16546      */
    16547     PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
    16548     if (pTbAllocator->pDelayedFreeHead)
    16549         iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
    16550 
    16551     PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
    16552     AssertReturn(paFinalInstrBuf, pTb);
    16553     memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
    16554 
    16555     /*
    16556      * Apply fixups.
    16557      */
    16558     PIEMNATIVEFIXUP const paFixups   = pReNative->paFixups;
    16559     uint32_t const        cFixups    = pReNative->cFixups;
    16560     for (uint32_t i = 0; i < cFixups; i++)
    16561     {
    16562         Assert(paFixups[i].off < off);
    16563         Assert(paFixups[i].idxLabel < cLabels);
    16564         AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
    16565                   ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
    16566                    paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
    16567         RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
    16568         switch (paFixups[i].enmType)
    16569         {
    16570 #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
    16571             case kIemNativeFixupType_Rel32:
    16572                 Assert(paFixups[i].off + 4 <= off);
    16573                 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    16574                 continue;
    16575 
    16576 #elif defined(RT_ARCH_ARM64)
    16577             case kIemNativeFixupType_RelImm26At0:
    16578             {
    16579                 Assert(paFixups[i].off < off);
    16580                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    16581                 Assert(offDisp >= -262144 && offDisp < 262144);
    16582                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
    16583                 continue;
    16584             }
    16585 
    16586             case kIemNativeFixupType_RelImm19At5:
    16587             {
    16588                 Assert(paFixups[i].off < off);
    16589                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    16590                 Assert(offDisp >= -262144 && offDisp < 262144);
    16591                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
    16592                 continue;
    16593             }
    16594 
    16595             case kIemNativeFixupType_RelImm14At5:
    16596             {
    16597                 Assert(paFixups[i].off < off);
    16598                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    16599                 Assert(offDisp >= -8192 && offDisp < 8192);
    16600                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
    16601                 continue;
    16602             }
    16603 
    16604 #endif
    16605             case kIemNativeFixupType_Invalid:
    16606             case kIemNativeFixupType_End:
    16607                 break;
    16608         }
    16609         AssertFailed();
    16610     }
    16611 
    16612     iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
    16613     STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
    16614 
    16615     /*
    16616      * Convert the translation block.
    16617      */
    16618     RTMemFree(pTb->Thrd.paCalls);
    16619     pTb->Native.paInstructions  = paFinalInstrBuf;
    16620     pTb->Native.cInstructions   = off;
    16621     pTb->fFlags                 = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
    16622 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    16623     pTb->pDbgInfo               = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
    16624                                                       RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
    16625 #endif
    16626 
    16627     Assert(pTbAllocator->cThreadedTbs > 0);
    16628     pTbAllocator->cThreadedTbs -= 1;
    16629     pTbAllocator->cNativeTbs   += 1;
    16630     Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
    16631 
    16632 #ifdef LOG_ENABLED
    16633     /*
    16634      * Disassemble to the log if enabled.
    16635      */
    16636     if (LogIs3Enabled())
    16637     {
    16638         Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
    16639         iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
    16640 # if defined(DEBUG_bird) || defined(DEBUG_aeichner)
    16641         RTLogFlush(NULL);
    16642 # endif
    16643     }
    16644 #endif
    16645     /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
    16646 
    16647     STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
    16648     return pTb;
    16649 }
    16650 
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r103805 r103807  
    130130*   Internal Functions                                                                                                           *
    131131*********************************************************************************************************************************/
    132 #ifdef VBOX_STRICT
    133 static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    134                                                 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
    135 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    136 static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
    137                                                     IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
    138 # endif
    139 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
    140 #endif
    141132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    142 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
    143133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
    144134#endif
     
    34073397 * Debug Info: Adds a native offset record, if necessary.
    34083398 */
    3409 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
     3399DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    34103400{
    34113401    PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
     
    34763466 * Debug Info: Record info about guest register shadowing.
    34773467 */
    3478 static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
    3479                                                  uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
     3468DECL_HIDDEN_THROW(void)
     3469iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
     3470                                     uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
    34803471{
    34813472    PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
     
    34923483 * Debug Info: Record info about guest register shadowing.
    34933484 */
    3494 static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    3495                                                      uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
     3485DECL_HIDDEN_THROW(void)
     3486iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
     3487                                         uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
    34963488{
    34973489    PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
     
    35093501 * Debug Info: Record info about delayed RIP updates.
    35103502 */
    3511 static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
     3503DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
    35123504{
    35133505    PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
     
    35473539#endif
    35483540};
     3541AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    35493542
    35503543/**
     
    36063599 * @see IEMNATIVEGSTREG
    36073600 */
    3608 static struct
    3609 {
    3610     /** Offset in VMCPU. */
    3611     uint32_t    off;
    3612     /** The field size. */
    3613     uint8_t     cb;
    3614     /** Name (for logging). */
    3615     const char *pszName;
    3616 } const g_aGstShadowInfo[] =
     3601DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
    36173602{
    36183603#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
     
    36843669
    36853670
    3686 DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
    3687                                                      IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
    3688 {
    3689     pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    3690 
    3691     pReNative->Core.aHstRegs[idxReg].enmWhat        = enmWhat;
    3692     pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3693     pReNative->Core.aHstRegs[idxReg].idxVar         = idxVar;
    3694     return (uint8_t)idxReg;
    3695 }
    3696 
    3697 
    36983671#if 0 /* unused */
    36993672/**
     
    39443917 *                          call-volatile registers.
    39453918 */
    3946 static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3947                                                 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
     3919DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
     3920                                                            uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
    39483921{
    39493922    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     
    41264099    *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
    41274100    return idxReg;
    4128 }
    4129 
    4130 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4131 
    4132 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4133 /**
    4134  * Helper for iemNativeLivenessGetStateByGstReg.
    4135  *
    4136  * @returns IEMLIVENESS_STATE_XXX
    4137  * @param   fMergedStateExp2    This is the RT_BIT_32() of each sub-state
    4138  *                              ORed together.
    4139  */
    4140 DECL_FORCE_INLINE(uint32_t)
    4141 iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
    4142 {
    4143     /* INPUT trumps anything else. */
    4144     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
    4145         return IEMLIVENESS_STATE_INPUT;
    4146 
    4147     /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
    4148     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
    4149     {
    4150         /* If not all sub-fields are clobbered they must be considered INPUT. */
    4151         if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
    4152             return IEMLIVENESS_STATE_INPUT;
    4153         return IEMLIVENESS_STATE_CLOBBERED;
    4154     }
    4155 
    4156     /* XCPT_OR_CALL trumps UNUSED. */
    4157     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
    4158         return IEMLIVENESS_STATE_XCPT_OR_CALL;
    4159 
    4160     return IEMLIVENESS_STATE_UNUSED;
    4161 }
    4162 # endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
    4163 
    4164 
    4165 DECL_FORCE_INLINE(uint32_t)
    4166 iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
    4167 {
    4168 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4169     return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
    4170          | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
    4171 # else
    4172     return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx)       & 1)
    4173          | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
    4174          | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
    4175          | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
    4176 # endif
    4177 }
    4178 
    4179 
    4180 DECL_FORCE_INLINE(uint32_t)
    4181 iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
    4182 {
    4183     uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
    4184     if (enmGstReg == kIemNativeGstReg_EFlags)
    4185     {
    4186         /* Merge the eflags states to one. */
    4187 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4188         uRet  = RT_BIT_32(uRet);
    4189         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
    4190         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
    4191         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
    4192         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
    4193         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
    4194         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
    4195         uRet  = iemNativeLivenessMergeExpandedEFlagsState(uRet);
    4196 # else
    4197         AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
    4198         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
    4199         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
    4200         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
    4201         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
    4202         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
    4203         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
    4204 # endif
    4205     }
    4206     return uRet;
    4207 }
    4208 
    4209 
    4210 # ifdef VBOX_STRICT
    4211 /** For assertions only, user checks that idxCurCall isn't zerow. */
    4212 DECL_FORCE_INLINE(uint32_t)
    4213 iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
    4214 {
    4215     return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
    4216 }
    4217 # endif /* VBOX_STRICT */
    4218 
    4219 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    4220 
    4221 /**
    4222  * Marks host register @a idxHstReg as containing a shadow copy of guest
    4223  * register @a enmGstReg.
    4224  *
    4225  * ASSUMES that caller has made sure @a enmGstReg is not associated with any
    4226  * host register before calling.
    4227  */
    4228 DECL_FORCE_INLINE(void)
    4229 iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4230 {
    4231     Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
    4232     Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4233     Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
    4234 
    4235     pReNative->Core.aidxGstRegShadows[enmGstReg]       = idxHstReg;
    4236     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
    4237     pReNative->Core.bmGstRegShadows                   |= RT_BIT_64(enmGstReg);
    4238     pReNative->Core.bmHstRegsWithGstShadow            |= RT_BIT_32(idxHstReg);
    4239 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4240     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4241     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
    4242 #else
    4243     RT_NOREF(off);
    4244 #endif
    4245 }
    4246 
    4247 
    4248 /**
    4249  * Clear any guest register shadow claims from @a idxHstReg.
    4250  *
    4251  * The register does not need to be shadowing any guest registers.
    4252  */
    4253 DECL_FORCE_INLINE(void)
    4254 iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
    4255 {
    4256     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4257               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    4258            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4259     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    4260            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4261 
    4262 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4263     uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4264     if (fGstRegs)
    4265     {
    4266         Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
    4267         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4268         while (fGstRegs)
    4269         {
    4270             unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4271             fGstRegs &= ~RT_BIT_64(iGstReg);
    4272             iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
    4273         }
    4274     }
    4275 #else
    4276     RT_NOREF(off);
    4277 #endif
    4278 
    4279     pReNative->Core.bmHstRegsWithGstShadow            &= ~RT_BIT_32(idxHstReg);
    4280     pReNative->Core.bmGstRegShadows                   &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4281     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4282 }
    4283 
    4284 
    4285 /**
    4286  * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
    4287  * and global overview flags.
    4288  */
    4289 DECL_FORCE_INLINE(void)
    4290 iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4291 {
    4292     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4293     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4294               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    4295            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4296     Assert(pReNative->Core.bmGstRegShadows                    & RT_BIT_64(enmGstReg));
    4297     Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4298     Assert(pReNative->Core.bmHstRegsWithGstShadow             & RT_BIT_32(idxHstReg));
    4299 
    4300 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4301     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4302     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
    4303 #else
    4304     RT_NOREF(off);
    4305 #endif
    4306 
    4307     uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    4308     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    4309     if (!fGstRegShadowsNew)
    4310         pReNative->Core.bmHstRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
    4311     pReNative->Core.bmGstRegShadows                   &= ~RT_BIT_64(enmGstReg);
    4312 }
    4313 
    4314 
    4315 #if 0 /* unused */
    4316 /**
    4317  * Clear any guest register shadow claim for @a enmGstReg.
    4318  */
    4319 DECL_FORCE_INLINE(void)
    4320 iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4321 {
    4322     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4323     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4324     {
    4325         Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4326         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    4327     }
    4328 }
    4329 #endif
    4330 
    4331 
    4332 /**
    4333  * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
    4334  * as the new shadow of it.
    4335  *
    4336  * Unlike the other guest reg shadow helpers, this does the logging for you.
    4337  * However, it is the liveness state is not asserted here, the caller must do
    4338  * that.
    4339  */
    4340 DECL_FORCE_INLINE(void)
    4341 iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
    4342                                        IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4343 {
    4344     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4345     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4346     {
    4347         uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4348         Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4349         if (idxHstRegOld == idxHstRegNew)
    4350             return;
    4351         Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
    4352                g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
    4353         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    4354     }
    4355     else
    4356         Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
    4357                g_aGstShadowInfo[enmGstReg].pszName));
    4358     iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
    4359 }
    4360 
    4361 
    4362 /**
    4363  * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
    4364  * to @a idxRegTo.
    4365  */
    4366 DECL_FORCE_INLINE(void)
    4367 iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
    4368                                     IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4369 {
    4370     Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
    4371     Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
    4372     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
    4373               == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
    4374            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4375     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
    4376            == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
    4377     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
    4378            == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
    4379 
    4380     uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    4381     pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
    4382     if (!fGstRegShadowsFrom)
    4383         pReNative->Core.bmHstRegsWithGstShadow          &= ~RT_BIT_32(idxRegFrom);
    4384     pReNative->Core.bmHstRegsWithGstShadow              |= RT_BIT_32(idxRegTo);
    4385     pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows   |= RT_BIT_64(enmGstReg);
    4386     pReNative->Core.aidxGstRegShadows[enmGstReg]         = idxRegTo;
    4387 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4388     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4389     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
    4390 #else
    4391     RT_NOREF(off);
    4392 #endif
    43934101}
    43944102
     
    51434851
    51444852
    5145 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    5146 # if defined(IEMNATIVE_REG_FIXED_PC_DBG)
    5147 static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5148 {
    5149     /* Compare the shadow with the context value, they should match. */
    5150     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
    5151     off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
    5152     off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
    5153     return off;
    5154 }
    5155 # endif
    5156 
    5157 /**
    5158  * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
    5159  */
    5160 static uint32_t
    5161 iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5162 {
    5163     if (pReNative->Core.offPc)
    5164     {
    5165 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5166         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5167         iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
    5168 # endif
    5169 
    5170 # ifndef IEMNATIVE_REG_FIXED_PC_DBG
    5171         /* Allocate a temporary PC register. */
    5172         uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5173 
    5174         /* Perform the addition and store the result. */
    5175         off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
    5176         off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5177 
    5178         /* Free but don't flush the PC register. */
    5179         iemNativeRegFreeTmp(pReNative, idxPcReg);
    5180 # else
    5181         /* Compare the shadow with the context value, they should match. */
    5182         off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
    5183         off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
    5184 # endif
    5185 
    5186         STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
    5187         pReNative->Core.offPc                 = 0;
    5188         pReNative->Core.cInstrPcUpdateSkipped = 0;
    5189     }
    5190 # if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
    5191     else
    5192     {
    5193         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5194         iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
    5195     }
    5196 # endif
    5197 
    5198     return off;
    5199 }
    5200 #endif
    5201 
    5202 
    5203 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    52044853
    52054854
     
    52074856*   SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge)                            *
    52084857*********************************************************************************************************************************/
     4858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    52094859
    52104860/**
     
    52614911
    52624912
    5263 DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
    5264                                                          IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
    5265 {
    5266     pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
    5267 
    5268     pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat        = enmWhat;
    5269     pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
    5270     RT_NOREF(idxVar);
    5271     return idxSimdReg;
    5272 }
    5273 
    5274 
    52754913/**
    52764914 * Frees a temporary SIMD register.
     
    52974935 * @param   enmGstSimdReg   The guest SIMD register to flush.
    52984936 */
    5299 static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
     4937DECL_HIDDEN_THROW(uint32_t)
     4938iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
    53004939{
    53014940    uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
     
    54865125    AssertFailed();
    54875126    return UINT8_MAX;
    5488 }
    5489 
    5490 
    5491 /**
    5492  * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
    5493  * SIMD register @a enmGstSimdReg.
    5494  *
    5495  * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
    5496  * host register before calling.
    5497  */
    5498 DECL_FORCE_INLINE(void)
    5499 iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
    5500 {
    5501     Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
    5502     Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
    5503     Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
    5504 
    5505     pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]       = idxHstSimdReg;
    5506     pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
    5507     pReNative->Core.bmGstSimdRegShadows                        |= RT_BIT_64(enmGstSimdReg);
    5508     pReNative->Core.bmHstSimdRegsWithGstShadow                 |= RT_BIT_32(idxHstSimdReg);
    5509 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5510     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5511     iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
    5512 #else
    5513     RT_NOREF(off);
    5514 #endif
    5515 }
    5516 
    5517 
    5518 /**
    5519  * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
    5520  * to @a idxSimdRegTo.
    5521  */
    5522 DECL_FORCE_INLINE(void)
    5523 iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
    5524                                             IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
    5525 {
    5526     Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
    5527     Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
    5528     Assert(      (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
    5529               == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
    5530            && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    5531     Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
    5532            == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
    5533     Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
    5534            == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
    5535     Assert(   pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
    5536            == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
    5537 
    5538 
    5539     uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
    5540     pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
    5541     if (!fGstRegShadowsFrom)
    5542     {
    5543         pReNative->Core.bmHstSimdRegsWithGstShadow               &= ~RT_BIT_32(idxSimdRegFrom);
    5544         pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded    = kIemNativeGstSimdRegLdStSz_Invalid;
    5545     }
    5546     pReNative->Core.bmHstSimdRegsWithGstShadow                |= RT_BIT_32(idxSimdRegTo);
    5547     pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
    5548     pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]       = idxSimdRegTo;
    5549 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5550     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5551     iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
    5552 #else
    5553     RT_NOREF(off);
    5554 #endif
    5555 }
    5556 
    5557 
    5558 /**
    5559  * Clear any guest register shadow claims from @a idxHstSimdReg.
    5560  *
    5561  * The register does not need to be shadowing any guest registers.
    5562  */
    5563 DECL_FORCE_INLINE(void)
    5564 iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
    5565 {
    5566     Assert(      (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
    5567               == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
    5568            && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
    5569     Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
    5570            == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
    5571     Assert(   !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
    5572            && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
    5573 
    5574 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5575     uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
    5576     if (fGstRegs)
    5577     {
    5578         Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
    5579         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5580         while (fGstRegs)
    5581         {
    5582             unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    5583             fGstRegs &= ~RT_BIT_64(iGstReg);
    5584             iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
    5585         }
    5586     }
    5587 #else
    5588     RT_NOREF(off);
    5589 #endif
    5590 
    5591     pReNative->Core.bmHstSimdRegsWithGstShadow        &= ~RT_BIT_32(idxHstSimdReg);
    5592     pReNative->Core.bmGstSimdRegShadows               &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
    5593     pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
    5594     pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    55955127}
    55965128
     
    57755307 * @param   enmLoadSz       The load size to set.
    57765308 */
    5777 DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
     5309DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
     5310                                                         IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    57785311{
    57795312    /* Everything valid already? -> nothing to do. */
     
    60375570*********************************************************************************************************************************/
    60385571
    6039 /**
    6040  * Flushes delayed write of a specific guest register.
    6041  *
    6042  * This must be called prior to calling CImpl functions and any helpers that use
    6043  * the guest state (like raising exceptions) and such.
    6044  *
    6045  * This optimization has not yet been implemented.  The first target would be
    6046  * RIP updates, since these are the most common ones.
    6047  */
    6048 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6049                                                                   IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
    6050 {
    6051 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    6052     /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
    6053 #endif
    6054 
    6055 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    6056     if (   enmClass == kIemNativeGstRegRef_XReg
    6057         && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
    6058     {
    6059         off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
    6060         /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
    6061         uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
    6062 
    6063         iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
    6064         iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
    6065     }
    6066 #endif
    6067     RT_NOREF(pReNative, enmClass, idxReg);
    6068     return off;
    6069 }
    6070 
    6071 
    6072 /**
    6073  * Flushes any delayed guest register writes.
    6074  *
    6075  * This must be called prior to calling CImpl functions and any helpers that use
    6076  * the guest state (like raising exceptions) and such.
    6077  *
    6078  * This optimization has not yet been implemented.  The first target would be
    6079  * RIP updates, since these are the most common ones.
    6080  */
    6081 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
    6082                                                            bool fFlushShadows /*= true*/)
    6083 {
    6084 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    6085     if (!(fGstShwExcept & kIemNativeGstReg_Pc))
    6086         off = iemNativeEmitPcWriteback(pReNative, off);
    6087 #else
    6088     RT_NOREF(pReNative, fGstShwExcept);
    6089 #endif
    6090 
    6091 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    6092     /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
    6093     for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
    6094     {
    6095         Assert(   (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
    6096                || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
    6097 
    6098         if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
    6099             off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
    6100 
    6101         if (   fFlushShadows
    6102             && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
    6103         {
    6104             uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
    6105 
    6106             iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
    6107             iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
    6108         }
    6109     }
    6110 #else
    6111     RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
    6112 #endif
    6113 
    6114     return off;
    6115 }
    6116 
    6117 
    61185572#ifdef VBOX_STRICT
    61195573/**
    61205574 * Does internal register allocator sanity checks.
    61215575 */
    6122 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
     5576DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
    61235577{
    61245578    /*
     
    61745628               bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
    61755629}
    6176 #endif
     5630#endif /* VBOX_STRICT */
     5631
     5632
     5633/**
     5634 * Flushes any delayed guest register writes.
     5635 *
     5636 * This must be called prior to calling CImpl functions and any helpers that use
     5637 * the guest state (like raising exceptions) and such.
     5638 *
     5639 * This optimization has not yet been implemented.  The first target would be
     5640 * RIP updates, since these are the most common ones.
     5641 */
     5642DECL_HIDDEN_THROW(uint32_t)
     5643iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
     5644{
     5645#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     5646    if (!(fGstShwExcept & kIemNativeGstReg_Pc))
     5647        off = iemNativeEmitPcWriteback(pReNative, off);
     5648#else
     5649    RT_NOREF(pReNative, fGstShwExcept);
     5650#endif
     5651
     5652#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     5653    /** @todo r=bird: There must be a quicker way to check if anything needs
     5654     *        doing and then call simd function to do the flushing */
     5655    /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
     5656    for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
     5657    {
     5658        Assert(   (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
     5659               || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
     5660
     5661        if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
     5662            off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
     5663
     5664        if (   fFlushShadows
     5665            && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
     5666        {
     5667            uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
     5668
     5669            iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
     5670            iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
     5671        }
     5672    }
     5673#else
     5674    RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
     5675#endif
     5676
     5677    return off;
     5678}
     5679
     5680
     5681#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     5682/**
     5683 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
     5684 */
     5685DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
     5686{
     5687    Assert(pReNative->Core.offPc);
     5688# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     5689    iemNativeDbgInfoAddNativeOffset(pReNative, off);
     5690    iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
     5691# endif
     5692
     5693# ifndef IEMNATIVE_REG_FIXED_PC_DBG
     5694    /* Allocate a temporary PC register. */
     5695    uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
     5696
     5697    /* Perform the addition and store the result. */
     5698    off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
     5699    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
     5700
     5701    /* Free but don't flush the PC register. */
     5702    iemNativeRegFreeTmp(pReNative, idxPcReg);
     5703# else
     5704    /* Compare the shadow with the context value, they should match. */
     5705    off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
     5706    off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
     5707# endif
     5708
     5709    STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
     5710    pReNative->Core.offPc                 = 0;
     5711    pReNative->Core.cInstrPcUpdateSkipped = 0;
     5712
     5713    return off;
     5714}
     5715#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
    61775716
    61785717
     
    61975736iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
    61985737{
    6199     Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
     5738    Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
    62005739    Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
    62015740
     
    62525791    }
    62535792}
    6254 #endif
     5793#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    62555794
    62565795#ifdef VBOX_STRICT
     5796
    62575797/**
    62585798 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
     
    62615801 *       Trashes EFLAGS on AMD64.
    62625802 */
    6263 static uint32_t
     5803DECL_HIDDEN_THROW(uint32_t)
    62645804iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
    62655805{
     
    63115851    return off;
    63125852}
    6313 #endif /* VBOX_STRICT */
    6314 
    6315 
    6316 #ifdef VBOX_STRICT
     5853
     5854
    63175855/**
    63185856 * Emitting code that checks that the content of register @a idxReg is the same
     
    63235861 *       Trashes EFLAGS on AMD64.
    63245862 */
    6325 static uint32_t
     5863DECL_HIDDEN_THROW(uint32_t)
    63265864iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
    63275865{
     
    64185956 *       Trashes EFLAGS on AMD64.
    64195957 */
    6420 static uint32_t
    6421 iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    6422                                     IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
     5958DECL_HIDDEN_THROW(uint32_t)
     5959iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
     5960                                    IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    64235961{
    64245962    /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
     
    65466084    return off;
    65476085}
    6548 # endif
    6549 #endif /* VBOX_STRICT */
    6550 
    6551 
    6552 #ifdef VBOX_STRICT
     6086# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
     6087
     6088
    65536089/**
    65546090 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
     
    65586094 *       Trashes EFLAGS on AMD64.
    65596095 */
    6560 static uint32_t
     6096DECL_HIDDEN_THROW(uint32_t)
    65616097iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
    65626098{
     
    65926128    return off;
    65936129}
     6130
    65946131#endif /* VBOX_STRICT */
    6595 
    65966132
    65976133/**
     
    67536289    iemNativeRegFreeTmp(pReNative, idxRegCsLim);
    67546290    return off;
    6755 }
    6756 
    6757 
    6758 /**
    6759  * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
    6760  *
    6761  * @returns The flush mask.
    6762  * @param   fCImpl          The IEM_CIMPL_F_XXX flags.
    6763  * @param   fGstShwFlush    The starting flush mask.
    6764  */
    6765 DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
    6766 {
    6767     if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
    6768         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_CS)
    6769                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_CS)
    6770                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
    6771     if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
    6772         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
    6773                      |  RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_SS)
    6774                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_SS)
    6775                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
    6776     else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
    6777         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
    6778     if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
    6779         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    6780     return fGstShwFlush;
    67816291}
    67826292
     
    74246934
    74256935
    7426 
    7427 
    7428 /*********************************************************************************************************************************
    7429 *   Emitters for IEM_MC_BEGIN and IEM_MC_END.                                                                                    *
    7430 *********************************************************************************************************************************/
    7431 
    7432 #define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
    7433     { \
    7434         Assert(pReNative->Core.bmVars     == 0); \
    7435         Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
    7436         Assert(pReNative->Core.bmStack    == 0); \
    7437         pReNative->fMc    = (a_fMcFlags); \
    7438         pReNative->fCImpl = (a_fCImplFlags); \
    7439         pReNative->cArgs  = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
    7440 
    7441 /** We have to get to the end in recompilation mode, as otherwise we won't
    7442  * generate code for all the IEM_MC_IF_XXX branches. */
    7443 #define IEM_MC_END() \
    7444         iemNativeVarFreeAll(pReNative); \
    7445     } return off
    7446 
    7447 
    7448 
    7449 /*********************************************************************************************************************************
    7450 *   Native Emitter Support.                                                                                                      *
    7451 *********************************************************************************************************************************/
    7452 
    7453 
    7454 #define IEM_MC_NATIVE_IF(a_fSupportedHosts)     if (RT_ARCH_VAL & (a_fSupportedHosts)) {
    7455 
    7456 #define IEM_MC_NATIVE_ELSE()                    } else {
    7457 
    7458 #define IEM_MC_NATIVE_ENDIF()                   } ((void)0)
    7459 
    7460 
    7461 #define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
    7462     off = a_fnEmitter(pReNative, off)
    7463 
    7464 #define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
    7465     off = a_fnEmitter(pReNative, off, (a0))
    7466 
    7467 #define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
    7468     off = a_fnEmitter(pReNative, off, (a0), (a1))
    7469 
    7470 #define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
    7471     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
    7472 
    7473 #define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
    7474     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
    7475 
    7476 #define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
    7477     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
    7478 
    7479 #define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
    7480     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
    7481 
    7482 #define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
    7483     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
    7484 
    7485 #define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
    7486     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
    7487 
    7488 
    7489 
    7490 /*********************************************************************************************************************************
    7491 *   Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX)                                               *
    7492 *********************************************************************************************************************************/
    7493 
    7494 #define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
    7495     pReNative->fMc    = 0; \
    7496     pReNative->fCImpl = (a_fFlags); \
    7497     return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
    7498 
    7499 
    7500 #define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
    7501     pReNative->fMc    = 0; \
    7502     pReNative->fCImpl = (a_fFlags); \
    7503     return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
    7504 
    7505 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    7506                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    7507                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
    7508 {
    7509     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
    7510 }
    7511 
    7512 
    7513 #define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
    7514     pReNative->fMc    = 0; \
    7515     pReNative->fCImpl = (a_fFlags); \
    7516     return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
    7517                                    (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
    7518 
    7519 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    7520                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    7521                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
    7522 {
    7523     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
    7524 }
    7525 
    7526 
    7527 #define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
    7528     pReNative->fMc    = 0; \
    7529     pReNative->fCImpl = (a_fFlags); \
    7530     return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
    7531                                    (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
    7532 
    7533 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    7534                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    7535                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
    7536                                                     uint64_t uArg2)
    7537 {
    7538     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
    7539 }
    7540 
    7541 
    7542 
    7543 /*********************************************************************************************************************************
    7544 *   Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX)                                                     *
    7545 *********************************************************************************************************************************/
    7546 
    7547 /** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
    7548  *  and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
    7549 DECL_INLINE_THROW(uint32_t)
    7550 iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    7551 {
    7552     /*
    7553      * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
    7554      * return with special status code and make the execution loop deal with
    7555      * this.  If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
    7556      * exception and won't continue execution.  While CPUMCTX_DBG_DBGF_MASK
    7557      * could continue w/o interruption, it probably will drop into the
    7558      * debugger, so not worth the effort of trying to services it here and we
    7559      * just lump it in with the handling of the others.
    7560      *
    7561      * To simplify the code and the register state management even more (wrt
    7562      * immediate in AND operation), we always update the flags and skip the
    7563      * extra check associated conditional jump.
    7564      */
    7565     AssertCompile(   (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
    7566                   <= UINT32_MAX);
    7567 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    7568     AssertMsg(   pReNative->idxCurCall == 0
    7569               || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
    7570               ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
    7571 #endif
    7572 
    7573     /*
    7574      * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
    7575      * any pending register writes must be flushed.
    7576      */
    7577     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7578 
    7579     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7580                                                               kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
    7581                                                               true /*fSkipLivenessAssert*/);
    7582     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
    7583                                                              X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
    7584                                                              iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
    7585     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
    7586     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
    7587 
    7588     /* Free but don't flush the EFLAGS register. */
    7589     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7590 
    7591     return off;
    7592 }
    7593 
    7594 
    7595 /** The VINF_SUCCESS dummy. */
    7596 template<int const a_rcNormal>
    7597 DECL_FORCE_INLINE(uint32_t)
    7598 iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    7599 {
    7600     AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
    7601     if (a_rcNormal != VINF_SUCCESS)
    7602     {
    7603 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7604         off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    7605 #else
    7606         RT_NOREF_PV(idxInstr);
    7607 #endif
    7608 
    7609         /* As this code returns from the TB any pending register writes must be flushed. */
    7610         off = iemNativeRegFlushPendingWrites(pReNative, off);
    7611 
    7612         return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
    7613     }
    7614     return off;
    7615 }
    7616 
    7617 
    7618 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
    7619     off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    7620     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7621 
    7622 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
    7623     off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    7624     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7625     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7626 
    7627 /** Same as iemRegAddToRip64AndFinishingNoFlags. */
    7628 DECL_INLINE_THROW(uint32_t)
    7629 iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    7630 {
    7631 #if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7632 # if defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7633     if (!pReNative->Core.offPc)
    7634         off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7635 # endif
    7636 
    7637     /* Allocate a temporary PC register. */
    7638     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    7639 
    7640     /* Perform the addition and store the result. */
    7641     off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
    7642     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7643 
    7644     /* Free but don't flush the PC register. */
    7645     iemNativeRegFreeTmp(pReNative, idxPcReg);
    7646 #endif
    7647 
    7648 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    7649     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
    7650 
    7651     pReNative->Core.offPc += cbInstr;
    7652 # if defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7653     off = iemNativePcAdjustCheck(pReNative, off);
    7654 # endif
    7655     if (pReNative->cCondDepth)
    7656         off = iemNativeEmitPcWriteback(pReNative, off);
    7657     else
    7658         pReNative->Core.cInstrPcUpdateSkipped++;
    7659 #endif
    7660 
    7661     return off;
    7662 }
    7663 
    7664 
    7665 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
    7666     off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    7667     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7668 
    7669 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
    7670     off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    7671     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7672     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7673 
    7674 /** Same as iemRegAddToEip32AndFinishingNoFlags. */
    7675 DECL_INLINE_THROW(uint32_t)
    7676 iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    7677 {
    7678 #if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7679 # if defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7680     if (!pReNative->Core.offPc)
    7681         off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7682 # endif
    7683 
    7684     /* Allocate a temporary PC register. */
    7685     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    7686 
    7687     /* Perform the addition and store the result. */
    7688     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
    7689     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7690 
    7691     /* Free but don't flush the PC register. */
    7692     iemNativeRegFreeTmp(pReNative, idxPcReg);
    7693 #endif
    7694 
    7695 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    7696     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
    7697 
    7698     pReNative->Core.offPc += cbInstr;
    7699 # if defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7700     off = iemNativePcAdjustCheck(pReNative, off);
    7701 # endif
    7702     if (pReNative->cCondDepth)
    7703         off = iemNativeEmitPcWriteback(pReNative, off);
    7704     else
    7705         pReNative->Core.cInstrPcUpdateSkipped++;
    7706 #endif
    7707 
    7708     return off;
    7709 }
    7710 
    7711 
    7712 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
    7713     off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    7714     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7715 
    7716 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
    7717     off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    7718     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7719     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7720 
    7721 /** Same as iemRegAddToIp16AndFinishingNoFlags. */
    7722 DECL_INLINE_THROW(uint32_t)
    7723 iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    7724 {
    7725 #if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7726 # if defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7727     if (!pReNative->Core.offPc)
    7728         off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7729 # endif
    7730 
    7731     /* Allocate a temporary PC register. */
    7732     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    7733 
    7734     /* Perform the addition and store the result. */
    7735     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
    7736     off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    7737     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7738 
    7739     /* Free but don't flush the PC register. */
    7740     iemNativeRegFreeTmp(pReNative, idxPcReg);
    7741 #endif
    7742 
    7743 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    7744     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
    7745 
    7746     pReNative->Core.offPc += cbInstr;
    7747 # if defined(IEMNATIVE_REG_FIXED_PC_DBG)
    7748     off = iemNativePcAdjustCheck(pReNative, off);
    7749 # endif
    7750     if (pReNative->cCondDepth)
    7751         off = iemNativeEmitPcWriteback(pReNative, off);
    7752     else
    7753         pReNative->Core.cInstrPcUpdateSkipped++;
    7754 #endif
    7755 
    7756     return off;
    7757 }
    7758 
    7759 
    7760 
    7761 /*********************************************************************************************************************************
    7762 *   Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX).                                *
    7763 *********************************************************************************************************************************/
    7764 
    7765 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
    7766     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    7767                                                             (a_enmEffOpSize), pCallEntry->idxInstr); \
    7768     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7769 
    7770 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
    7771     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    7772                                                             (a_enmEffOpSize), pCallEntry->idxInstr); \
    7773     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7774     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7775 
    7776 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
    7777     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    7778                                                             IEMMODE_16BIT, pCallEntry->idxInstr); \
    7779     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7780 
    7781 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
    7782         off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    7783                                                                 IEMMODE_16BIT, pCallEntry->idxInstr); \
    7784     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7785     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7786 
    7787 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
    7788     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    7789                                                             IEMMODE_64BIT, pCallEntry->idxInstr); \
    7790     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7791 
    7792 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
    7793     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    7794                                                             IEMMODE_64BIT, pCallEntry->idxInstr); \
    7795     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7796     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7797 
    7798 /** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
    7799  *  iemRegRip64RelativeJumpS16AndFinishNoFlags and
    7800  *  iemRegRip64RelativeJumpS32AndFinishNoFlags. */
    7801 DECL_INLINE_THROW(uint32_t)
    7802 iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
    7803                                                   int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
    7804 {
    7805     Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
    7806 
    7807     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    7808     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7809 
    7810 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    7811     Assert(pReNative->Core.offPc == 0);
    7812 
    7813     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
    7814 #endif
    7815 
    7816     /* Allocate a temporary PC register. */
    7817     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    7818 
    7819     /* Perform the addition. */
    7820     off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
    7821 
    7822     if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
    7823     {
    7824         /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
    7825         off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    7826     }
    7827     else
    7828     {
    7829         /* Just truncate the result to 16-bit IP. */
    7830         Assert(enmEffOpSize == IEMMODE_16BIT);
    7831         off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    7832     }
    7833     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7834 
    7835     /* Free but don't flush the PC register. */
    7836     iemNativeRegFreeTmp(pReNative, idxPcReg);
    7837 
    7838     return off;
    7839 }
    7840 
    7841 
    7842 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
    7843     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    7844                                                             (a_enmEffOpSize), pCallEntry->idxInstr); \
    7845     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7846 
    7847 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
    7848     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    7849                                                             (a_enmEffOpSize), pCallEntry->idxInstr); \
    7850     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7851     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7852 
    7853 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
    7854     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    7855                                                             IEMMODE_16BIT, pCallEntry->idxInstr); \
    7856     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7857 
    7858 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
    7859     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    7860                                                             IEMMODE_16BIT, pCallEntry->idxInstr); \
    7861     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7862     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7863 
    7864 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
    7865     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    7866                                                             IEMMODE_32BIT, pCallEntry->idxInstr); \
    7867     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7868 
    7869 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
    7870             off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    7871                                                                     IEMMODE_32BIT, pCallEntry->idxInstr); \
    7872     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7873     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7874 
    7875 /** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
    7876  *  iemRegEip32RelativeJumpS16AndFinishNoFlags and
    7877  *  iemRegEip32RelativeJumpS32AndFinishNoFlags. */
    7878 DECL_INLINE_THROW(uint32_t)
    7879 iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
    7880                                                   int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
    7881 {
    7882     Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
    7883 
    7884     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    7885     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7886 
    7887 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    7888     Assert(pReNative->Core.offPc == 0);
    7889 
    7890     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
    7891 #endif
    7892 
    7893     /* Allocate a temporary PC register. */
    7894     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    7895 
    7896     /* Perform the addition. */
    7897     off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
    7898 
    7899     /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
    7900     if (enmEffOpSize == IEMMODE_16BIT)
    7901         off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    7902 
    7903     /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
    7904 /** @todo we can skip this in 32-bit FLAT mode. */
    7905     off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    7906 
    7907     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7908 
    7909     /* Free but don't flush the PC register. */
    7910     iemNativeRegFreeTmp(pReNative, idxPcReg);
    7911 
    7912     return off;
    7913 }
    7914 
    7915 
    7916 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
    7917     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
    7918     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7919 
    7920 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
    7921     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
    7922     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7923     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7924 
    7925 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
    7926     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
    7927     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7928 
    7929 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
    7930     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
    7931     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7932     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7933 
    7934 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
    7935     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
    7936     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7937 
    7938 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
    7939     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
    7940     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    7941     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    7942 
    7943 /** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
    7944 DECL_INLINE_THROW(uint32_t)
    7945 iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    7946                                                  uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
    7947 {
    7948     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    7949     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7950 
    7951 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    7952     Assert(pReNative->Core.offPc == 0);
    7953 
    7954     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
    7955 #endif
    7956 
    7957     /* Allocate a temporary PC register. */
    7958     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    7959 
    7960     /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
    7961     off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
    7962     off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    7963     off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    7964     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    7965 
    7966     /* Free but don't flush the PC register. */
    7967     iemNativeRegFreeTmp(pReNative, idxPcReg);
    7968 
    7969     return off;
    7970 }
    7971 
    7972 
    7973 
    7974 /*********************************************************************************************************************************
    7975 *   Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH).                                    *
    7976 *********************************************************************************************************************************/
    7977 
    7978 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
    7979 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
    7980     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    7981 
    7982 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
    7983 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
    7984     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    7985 
    7986 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
    7987 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
    7988     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    7989 
    7990 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
    7991  *  clears flags. */
    7992 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
    7993     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
    7994     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    7995 
    7996 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
    7997  *  clears flags. */
    7998 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
    7999     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
    8000     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    8001 
    8002 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
    8003  *  clears flags. */
    8004 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
    8005     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
    8006     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    8007 
    8008 #undef IEM_MC_SET_RIP_U16_AND_FINISH
    8009 
    8010 
    8011 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
    8012 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
    8013     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
    8014 
    8015 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
    8016 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
    8017     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
    8018 
    8019 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
    8020  *  clears flags. */
    8021 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
    8022     IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
    8023     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    8024 
    8025 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
    8026  *  and clears flags. */
    8027 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
    8028     IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
    8029     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    8030 
    8031 #undef IEM_MC_SET_RIP_U32_AND_FINISH
    8032 
    8033 
    8034 /** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
    8035 #define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
    8036     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
    8037 
    8038 /** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
    8039  *  and clears flags. */
    8040 #define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
    8041     IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
    8042     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    8043 
    8044 #undef IEM_MC_SET_RIP_U64_AND_FINISH
    8045 
    8046 
    8047 /** Same as iemRegRipJumpU16AndFinishNoFlags,
    8048  *  iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
    8049 DECL_INLINE_THROW(uint32_t)
    8050 iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
    8051                             uint8_t idxInstr, uint8_t cbVar)
    8052 {
    8053     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
    8054     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
    8055 
    8056     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    8057     off = iemNativeRegFlushPendingWrites(pReNative, off);
    8058 
    8059 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    8060     Assert(pReNative->Core.offPc == 0);
    8061 
    8062     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
    8063 #endif
    8064 
    8065     /* Get a register with the new PC loaded from idxVarPc.
    8066        Note! This ASSUMES that the high bits of the GPR is zeroed. */
    8067     uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
    8068 
    8069     /* Check limit (may #GP(0) + exit TB). */
    8070     if (!f64Bit)
    8071 /** @todo we can skip this test in FLAT 32-bit mode. */
    8072         off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    8073     /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
    8074     else if (cbVar > sizeof(uint32_t))
    8075         off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    8076 
    8077     /* Store the result. */
    8078     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    8079 
    8080     iemNativeVarRegisterRelease(pReNative, idxVarPc);
    8081     /** @todo implictly free the variable? */
    8082 
    8083     return off;
    8084 }
    8085 
    8086 
    8087 
    8088 /*********************************************************************************************************************************
    8089 *   Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX)                                                                     *
    8090 *********************************************************************************************************************************/
    8091 
    8092 #define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
    8093     off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
    8094 
    8095 /**
    8096  * Emits code to check if a \#NM exception should be raised.
    8097  *
    8098  * @returns New code buffer offset, UINT32_MAX on failure.
    8099  * @param   pReNative       The native recompile state.
    8100  * @param   off             The code buffer offset.
    8101  * @param   idxInstr        The current instruction.
    8102  */
    8103 DECL_INLINE_THROW(uint32_t)
    8104 iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    8105 {
    8106     /*
    8107      * Make sure we don't have any outstanding guest register writes as we may
    8108      * raise an #NM and all guest register must be up to date in CPUMCTX.
    8109      *
    8110      * @todo r=aeichner Can we postpone this to the RaiseNm path?
    8111      */
    8112     off = iemNativeRegFlushPendingWrites(pReNative, off);
    8113 
    8114 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    8115     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    8116 #else
    8117     RT_NOREF(idxInstr);
    8118 #endif
    8119 
    8120     /* Allocate a temporary CR0 register. */
    8121     uint8_t const idxCr0Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
    8122     uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
    8123 
    8124     /*
    8125      * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
    8126      *     return raisexcpt();
    8127      */
    8128     /* Test and jump. */
    8129     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
    8130 
    8131     /* Free but don't flush the CR0 register. */
    8132     iemNativeRegFreeTmp(pReNative, idxCr0Reg);
    8133 
    8134     return off;
    8135 }
    8136 
    8137 
    8138 #define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
    8139     off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
    8140 
    8141 /**
    8142  * Emits code to check if a \#MF exception should be raised.
    8143  *
    8144  * @returns New code buffer offset, UINT32_MAX on failure.
    8145  * @param   pReNative       The native recompile state.
    8146  * @param   off             The code buffer offset.
    8147  * @param   idxInstr        The current instruction.
    8148  */
    8149 DECL_INLINE_THROW(uint32_t)
    8150 iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    8151 {
    8152     /*
    8153      * Make sure we don't have any outstanding guest register writes as we may
    8154      * raise an #MF and all guest register must be up to date in CPUMCTX.
    8155      *
    8156      * @todo r=aeichner Can we postpone this to the RaiseMf path?
    8157      */
    8158     off = iemNativeRegFlushPendingWrites(pReNative, off);
    8159 
    8160 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    8161     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    8162 #else
    8163     RT_NOREF(idxInstr);
    8164 #endif
    8165 
    8166     /* Allocate a temporary FSW register. */
    8167     uint8_t const idxFpuFswReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
    8168     uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
    8169 
    8170     /*
    8171      * if (FSW & X86_FSW_ES != 0)
    8172      *     return raisexcpt();
    8173      */
    8174     /* Test and jump. */
    8175     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
    8176 
    8177     /* Free but don't flush the FSW register. */
    8178     iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
    8179 
    8180     return off;
    8181 }
    8182 
    8183 
    8184 #define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
    8185     off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
    8186 
    8187 /**
    8188  * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
    8189  *
    8190  * @returns New code buffer offset, UINT32_MAX on failure.
    8191  * @param   pReNative       The native recompile state.
    8192  * @param   off             The code buffer offset.
    8193  * @param   idxInstr        The current instruction.
    8194  */
    8195 DECL_INLINE_THROW(uint32_t)
    8196 iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    8197 {
    8198     /*
    8199      * Make sure we don't have any outstanding guest register writes as we may
    8200      * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
    8201      *
    8202      * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
    8203      */
    8204     off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
    8205 
    8206 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    8207     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    8208 #else
    8209     RT_NOREF(idxInstr);
    8210 #endif
    8211 
    8212     /* Allocate a temporary CR0 and CR4 register. */
    8213     uint8_t const idxCr0Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
    8214     uint8_t const idxCr4Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
    8215     uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
    8216     uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
    8217 
    8218     /** @todo r=aeichner Optimize this more later to have less compares and branches,
    8219      *                   (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
    8220      *                   actual performance benefit first). */
    8221     /*
    8222      * if (cr0 & X86_CR0_EM)
    8223      *     return raisexcpt();
    8224      */
    8225     off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
    8226     /*
    8227      * if (!(cr4 & X86_CR4_OSFXSR))
    8228      *     return raisexcpt();
    8229      */
    8230     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
    8231     /*
    8232      * if (cr0 & X86_CR0_TS)
    8233      *     return raisexcpt();
    8234      */
    8235     off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
    8236 
    8237     /* Free but don't flush the CR0 and CR4 register. */
    8238     iemNativeRegFreeTmp(pReNative, idxCr0Reg);
    8239     iemNativeRegFreeTmp(pReNative, idxCr4Reg);
    8240 
    8241     return off;
    8242 }
    8243 
    8244 
    8245 #define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
    8246     off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
    8247 
    8248 /**
    8249  * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
    8250  *
    8251  * @returns New code buffer offset, UINT32_MAX on failure.
    8252  * @param   pReNative       The native recompile state.
    8253  * @param   off             The code buffer offset.
    8254  * @param   idxInstr        The current instruction.
    8255  */
    8256 DECL_INLINE_THROW(uint32_t)
    8257 iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    8258 {
    8259     /*
    8260      * Make sure we don't have any outstanding guest register writes as we may
    8261      * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
    8262      *
    8263      * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
    8264      */
    8265     off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
    8266 
    8267 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    8268     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    8269 #else
    8270     RT_NOREF(idxInstr);
    8271 #endif
    8272 
    8273     /* Allocate a temporary CR0, CR4 and XCR0 register. */
    8274     uint8_t const idxCr0Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
    8275     uint8_t const idxCr4Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
    8276     uint8_t const idxXcr0Reg      = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
    8277     uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
    8278     uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
    8279 
    8280     /** @todo r=aeichner Optimize this more later to have less compares and branches,
    8281      *                   (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
    8282      *                   actual performance benefit first). */
    8283     /*
    8284      * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
    8285      *     return raisexcpt();
    8286      */
    8287     const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
    8288     off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
    8289     off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
    8290     iemNativeRegFreeTmp(pReNative, idxRegTmp);
    8291 
    8292     /*
    8293      * if (!(cr4 & X86_CR4_OSXSAVE))
    8294      *     return raisexcpt();
    8295      */
    8296     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
    8297     /*
    8298      * if (cr0 & X86_CR0_TS)
    8299      *     return raisexcpt();
    8300      */
    8301     off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
    8302 
    8303     /* Free but don't flush the CR0, CR4 and XCR0 register. */
    8304     iemNativeRegFreeTmp(pReNative, idxCr0Reg);
    8305     iemNativeRegFreeTmp(pReNative, idxCr4Reg);
    8306     iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
    8307 
    8308     return off;
    8309 }
    8310 
    8311 
    8312 #define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
    8313     off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
    8314 
    8315 /**
    8316  * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
    8317  *
    8318  * @returns New code buffer offset, UINT32_MAX on failure.
    8319  * @param   pReNative       The native recompile state.
    8320  * @param   off             The code buffer offset.
    8321  * @param   idxInstr        The current instruction.
    8322  */
    8323 DECL_INLINE_THROW(uint32_t)
    8324 iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    8325 {
    8326     /*
    8327      * Make sure we don't have any outstanding guest register writes as we may
    8328      * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
    8329      *
    8330      * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
    8331      */
    8332     off = iemNativeRegFlushPendingWrites(pReNative, off);
    8333 
    8334 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    8335     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    8336 #else
    8337     RT_NOREF(idxInstr);
    8338 #endif
    8339 
    8340     /* Allocate a temporary CR4 register. */
    8341     uint8_t const idxCr4Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
    8342     uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
    8343     uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
    8344 
    8345     /*
    8346      * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
    8347      *     return raisexcpt();
    8348      */
    8349     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
    8350 
    8351     /* raise \#UD exception unconditionally. */
    8352     off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
    8353 
    8354     /* Free but don't flush the CR4 register. */
    8355     iemNativeRegFreeTmp(pReNative, idxCr4Reg);
    8356 
    8357     return off;
    8358 }
    8359 
    8360 
    8361 
    8362 /*********************************************************************************************************************************
    8363 *   Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF)                                                         *
    8364 *********************************************************************************************************************************/
    8365 
    8366 /**
    8367  * Pushes an IEM_MC_IF_XXX onto the condition stack.
    8368  *
    8369  * @returns Pointer to the condition stack entry on success, NULL on failure
    8370  *          (too many nestings)
    8371  */
    8372 DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
    8373 {
    8374 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    8375     *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
    8376 #endif
    8377 
    8378     uint32_t const idxStack = pReNative->cCondDepth;
    8379     AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
    8380 
    8381     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
    8382     pReNative->cCondDepth = (uint8_t)(idxStack + 1);
    8383 
    8384     uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
    8385     pEntry->fInElse       = false;
    8386     pEntry->idxLabelElse  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
    8387     pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
    8388 
    8389     return pEntry;
    8390 }
    8391 
    8392 
    8393 /**
    8394  * Start of the if-block, snapshotting the register and variable state.
    8395  */
    8396 DECL_INLINE_THROW(void)
    8397 iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
    8398 {
    8399     Assert(offIfBlock != UINT32_MAX);
    8400     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    8401     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    8402     Assert(!pEntry->fInElse);
    8403 
    8404     /* Define the start of the IF block if request or for disassembly purposes. */
    8405     if (idxLabelIf != UINT32_MAX)
    8406         iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
    8407 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    8408     else
    8409         iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
    8410 #else
    8411     RT_NOREF(offIfBlock);
    8412 #endif
    8413 
    8414 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    8415     Assert(pReNative->Core.offPc == 0);
    8416 #endif
    8417 
    8418     /* Copy the initial state so we can restore it in the 'else' block. */
    8419     pEntry->InitialState = pReNative->Core;
    8420 }
    8421 
    8422 
    8423 #define IEM_MC_ELSE() } while (0); \
    8424         off = iemNativeEmitElse(pReNative, off); \
    8425         do {
    8426 
    8427 /** Emits code related to IEM_MC_ELSE. */
    8428 DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    8429 {
    8430     /* Check sanity and get the conditional stack entry. */
    8431     Assert(off != UINT32_MAX);
    8432     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    8433     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    8434     Assert(!pEntry->fInElse);
    8435 
    8436     /* Jump to the endif */
    8437     off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
    8438 
    8439     /* Define the else label and enter the else part of the condition. */
    8440     iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
    8441     pEntry->fInElse = true;
    8442 
    8443 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    8444     Assert(pReNative->Core.offPc == 0);
    8445 #endif
    8446 
    8447     /* Snapshot the core state so we can do a merge at the endif and restore
    8448        the snapshot we took at the start of the if-block. */
    8449     pEntry->IfFinalState = pReNative->Core;
    8450     pReNative->Core = pEntry->InitialState;
    8451 
    8452     return off;
    8453 }
    8454 
    8455 
    8456 #define IEM_MC_ENDIF() } while (0); \
    8457         off = iemNativeEmitEndIf(pReNative, off)
    8458 
    8459 /** Emits code related to IEM_MC_ENDIF. */
    8460 DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    8461 {
    8462     /* Check sanity and get the conditional stack entry. */
    8463     Assert(off != UINT32_MAX);
    8464     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    8465     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    8466 
    8467 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    8468     Assert(pReNative->Core.offPc == 0);
    8469 #endif
    8470 
    8471     /*
    8472      * Now we have find common group with the core state at the end of the
    8473      * if-final.  Use the smallest common denominator and just drop anything
    8474      * that isn't the same in both states.
    8475      */
    8476     /** @todo We could, maybe, shuffle registers around if we thought it helpful,
    8477      *        which is why we're doing this at the end of the else-block.
    8478      *        But we'd need more info about future for that to be worth the effort. */
    8479     PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
    8480     if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
    8481     {
    8482         /* shadow guest stuff first. */
    8483         uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
    8484         if (fGstRegs)
    8485         {
    8486             Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
    8487             do
    8488             {
    8489                 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    8490                 fGstRegs &= ~RT_BIT_64(idxGstReg);
    8491 
    8492                 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    8493                 if (  !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
    8494                     || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
    8495                 {
    8496                     Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
    8497                            g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
    8498                     iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
    8499                 }
    8500             } while (fGstRegs);
    8501         }
    8502         else
    8503             Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
    8504 
    8505         /* Check variables next. For now we must require them to be identical
    8506            or stuff we can recreate. */
    8507         Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
    8508         uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
    8509         if (fVars)
    8510         {
    8511             uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
    8512             do
    8513             {
    8514                 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
    8515                 fVars &= ~RT_BIT_32(idxVar);
    8516 
    8517                 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
    8518                 {
    8519                     if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
    8520                         continue;
    8521                     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    8522                     {
    8523                         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    8524                         if (idxHstReg != UINT8_MAX)
    8525                         {
    8526                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    8527                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    8528                             Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
    8529                                    g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
    8530                         }
    8531                         continue;
    8532                     }
    8533                 }
    8534                 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
    8535                     continue;
    8536 
    8537                 /* Irreconcilable, so drop it. */
    8538                 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    8539                 if (idxHstReg != UINT8_MAX)
    8540                 {
    8541                     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    8542                     pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    8543                     Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
    8544                            g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
    8545                 }
    8546                 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
    8547                 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    8548             } while (fVars);
    8549         }
    8550 
    8551         /* Finally, check that the host register allocations matches. */
    8552         AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
    8553                       ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
    8554                        pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
    8555                       IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
    8556     }
    8557 
    8558     /*
    8559      * Define the endif label and maybe the else one if we're still in the 'if' part.
    8560      */
    8561     if (!pEntry->fInElse)
    8562         iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
    8563     else
    8564         Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
    8565     iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
    8566 
    8567     /* Pop the conditional stack.*/
    8568     pReNative->cCondDepth -= 1;
    8569 
    8570     return off;
    8571 }
    8572 
    8573 
    8574 #define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
    8575         off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
    8576         do {
    8577 
    8578 /** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
    8579 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
    8580 {
    8581     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8582 
    8583     /* Get the eflags. */
    8584     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    8585                                                               kIemNativeGstRegUse_ReadOnly);
    8586 
    8587     /* Test and jump. */
    8588     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
    8589 
    8590     /* Free but don't flush the EFlags register. */
    8591     iemNativeRegFreeTmp(pReNative, idxEflReg);
    8592 
    8593     /* Make a copy of the core state now as we start the if-block. */
    8594     iemNativeCondStartIfBlock(pReNative, off);
    8595 
    8596     return off;
    8597 }
    8598 
    8599 
    8600 #define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
    8601         off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
    8602         do {
    8603 
    8604 /** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
    8605 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
    8606 {
    8607     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8608 
    8609     /* Get the eflags. */
    8610     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    8611                                                               kIemNativeGstRegUse_ReadOnly);
    8612 
    8613     /* Test and jump. */
    8614     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
    8615 
    8616     /* Free but don't flush the EFlags register. */
    8617     iemNativeRegFreeTmp(pReNative, idxEflReg);
    8618 
    8619     /* Make a copy of the core state now as we start the if-block. */
    8620     iemNativeCondStartIfBlock(pReNative, off);
    8621 
    8622     return off;
    8623 }
    8624 
    8625 
    8626 #define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
    8627         off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
    8628         do {
    8629 
    8630 /** Emits code for IEM_MC_IF_EFL_BIT_SET. */
    8631 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
    8632 {
    8633     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8634 
    8635     /* Get the eflags. */
    8636     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    8637                                                               kIemNativeGstRegUse_ReadOnly);
    8638 
    8639     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    8640     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    8641 
    8642     /* Test and jump. */
    8643     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    8644 
    8645     /* Free but don't flush the EFlags register. */
    8646     iemNativeRegFreeTmp(pReNative, idxEflReg);
    8647 
    8648     /* Make a copy of the core state now as we start the if-block. */
    8649     iemNativeCondStartIfBlock(pReNative, off);
    8650 
    8651     return off;
    8652 }
    8653 
    8654 
    8655 #define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
    8656         off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
    8657         do {
    8658 
    8659 /** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
    8660 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
    8661 {
    8662     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8663 
    8664     /* Get the eflags. */
    8665     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    8666                                                               kIemNativeGstRegUse_ReadOnly);
    8667 
    8668     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    8669     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    8670 
    8671     /* Test and jump. */
    8672     off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    8673 
    8674     /* Free but don't flush the EFlags register. */
    8675     iemNativeRegFreeTmp(pReNative, idxEflReg);
    8676 
    8677     /* Make a copy of the core state now as we start the if-block. */
    8678     iemNativeCondStartIfBlock(pReNative, off);
    8679 
    8680     return off;
    8681 }
    8682 
    8683 
    8684 #define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2)         \
    8685     off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
    8686     do {
    8687 
    8688 #define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2)         \
    8689     off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
    8690     do {
    8691 
    8692 /** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
    8693 DECL_INLINE_THROW(uint32_t)
    8694 iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    8695                                   uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
    8696 {
    8697     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8698 
    8699     /* Get the eflags. */
    8700     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    8701                                                               kIemNativeGstRegUse_ReadOnly);
    8702 
    8703     unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
    8704     Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
    8705 
    8706     unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
    8707     Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
    8708     Assert(iBitNo1 != iBitNo2);
    8709 
    8710 #ifdef RT_ARCH_AMD64
    8711     uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
    8712 
    8713     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    8714     if (iBitNo1 > iBitNo2)
    8715         off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
    8716     else
    8717         off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
    8718     off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    8719 
    8720 #elif defined(RT_ARCH_ARM64)
    8721     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    8722     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    8723 
    8724     /* and tmpreg, eflreg, #1<<iBitNo1 */
    8725     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
    8726 
    8727     /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
    8728     if (iBitNo1 > iBitNo2)
    8729         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    8730                                                 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
    8731     else
    8732         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    8733                                                 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
    8734 
    8735     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8736 
    8737 #else
    8738 # error "Port me"
    8739 #endif
    8740 
    8741     /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
    8742     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
    8743                                                      pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
    8744 
    8745     /* Free but don't flush the EFlags and tmp registers. */
    8746     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    8747     iemNativeRegFreeTmp(pReNative, idxEflReg);
    8748 
    8749     /* Make a copy of the core state now as we start the if-block. */
    8750     iemNativeCondStartIfBlock(pReNative, off);
    8751 
    8752     return off;
    8753 }
    8754 
    8755 
    8756 #define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
    8757     off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
    8758     do {
    8759 
    8760 #define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
    8761     off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
    8762     do {
    8763 
    8764 /** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
    8765  *  IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
    8766 DECL_INLINE_THROW(uint32_t)
    8767 iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
    8768                                               uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
    8769 {
    8770     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8771 
    8772     /* We need an if-block label for the non-inverted variant. */
    8773     uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
    8774                                                                  pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
    8775 
    8776     /* Get the eflags. */
    8777     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    8778                                                               kIemNativeGstRegUse_ReadOnly);
    8779 
    8780     /* Translate the flag masks to bit numbers. */
    8781     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    8782     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    8783 
    8784     unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
    8785     Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
    8786     Assert(iBitNo1 != iBitNo);
    8787 
    8788     unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
    8789     Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
    8790     Assert(iBitNo2 != iBitNo);
    8791     Assert(iBitNo2 != iBitNo1);
    8792 
    8793 #ifdef RT_ARCH_AMD64
    8794     uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
    8795 #elif defined(RT_ARCH_ARM64)
    8796     uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    8797 #endif
    8798 
    8799     /* Check for the lone bit first. */
    8800     if (!fInverted)
    8801         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    8802     else
    8803         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
    8804 
    8805     /* Then extract and compare the other two bits. */
    8806 #ifdef RT_ARCH_AMD64
    8807     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    8808     if (iBitNo1 > iBitNo2)
    8809         off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
    8810     else
    8811         off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
    8812     off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    8813 
    8814 #elif defined(RT_ARCH_ARM64)
    8815     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    8816 
    8817     /* and tmpreg, eflreg, #1<<iBitNo1 */
    8818     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
    8819 
    8820     /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
    8821     if (iBitNo1 > iBitNo2)
    8822         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    8823                                                 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
    8824     else
    8825         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    8826                                                 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
    8827 
    8828     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8829 
    8830 #else
    8831 # error "Port me"
    8832 #endif
    8833 
    8834     /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
    8835     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
    8836                                                      pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
    8837 
    8838     /* Free but don't flush the EFlags and tmp registers. */
    8839     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    8840     iemNativeRegFreeTmp(pReNative, idxEflReg);
    8841 
    8842     /* Make a copy of the core state now as we start the if-block. */
    8843     iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
    8844 
    8845     return off;
    8846 }
    8847 
    8848 
    8849 #define IEM_MC_IF_CX_IS_NZ() \
    8850     off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
    8851     do {
    8852 
    8853 /** Emits code for IEM_MC_IF_CX_IS_NZ. */
    8854 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    8855 {
    8856     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8857 
    8858     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    8859                                                                  kIemNativeGstRegUse_ReadOnly);
    8860     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
    8861     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    8862 
    8863     iemNativeCondStartIfBlock(pReNative, off);
    8864     return off;
    8865 }
    8866 
    8867 
    8868 #define IEM_MC_IF_ECX_IS_NZ() \
    8869     off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
    8870     do {
    8871 
    8872 #define IEM_MC_IF_RCX_IS_NZ() \
    8873     off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
    8874     do {
    8875 
    8876 /** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
    8877 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
    8878 {
    8879     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8880 
    8881     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    8882                                                                  kIemNativeGstRegUse_ReadOnly);
    8883     off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
    8884     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    8885 
    8886     iemNativeCondStartIfBlock(pReNative, off);
    8887     return off;
    8888 }
    8889 
    8890 
    8891 #define IEM_MC_IF_CX_IS_NOT_ONE() \
    8892     off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
    8893     do {
    8894 
    8895 /** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
    8896 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    8897 {
    8898     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8899 
    8900     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    8901                                                                  kIemNativeGstRegUse_ReadOnly);
    8902 #ifdef RT_ARCH_AMD64
    8903     off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    8904 #else
    8905     uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    8906     off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
    8907     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    8908 #endif
    8909     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    8910 
    8911     iemNativeCondStartIfBlock(pReNative, off);
    8912     return off;
    8913 }
    8914 
    8915 
    8916 #define IEM_MC_IF_ECX_IS_NOT_ONE() \
    8917     off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
    8918     do {
    8919 
    8920 #define IEM_MC_IF_RCX_IS_NOT_ONE() \
    8921     off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
    8922     do {
    8923 
    8924 /** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
    8925 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
    8926 {
    8927     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8928 
    8929     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    8930                                                                  kIemNativeGstRegUse_ReadOnly);
    8931     if (f64Bit)
    8932         off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    8933     else
    8934         off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    8935     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    8936 
    8937     iemNativeCondStartIfBlock(pReNative, off);
    8938     return off;
    8939 }
    8940 
    8941 
    8942 #define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
    8943     off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
    8944     do {
    8945 
    8946 #define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
    8947     off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
    8948     do {
    8949 
    8950 /** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
    8951  *  IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
    8952 DECL_INLINE_THROW(uint32_t)
    8953 iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
    8954 {
    8955     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    8956 
    8957     /* We have to load both RCX and EFLAGS before we can start branching,
    8958        otherwise we'll end up in the else-block with an inconsistent
    8959        register allocator state.
    8960        Doing EFLAGS first as it's more likely to be loaded, right? */
    8961     uint8_t const idxEflReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    8962                                                                  kIemNativeGstRegUse_ReadOnly);
    8963     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    8964                                                                  kIemNativeGstRegUse_ReadOnly);
    8965 
    8966     /** @todo we could reduce this to a single branch instruction by spending a
    8967      *        temporary register and some setnz stuff.  Not sure if loops are
    8968      *        worth it. */
    8969     /* Check CX. */
    8970 #ifdef RT_ARCH_AMD64
    8971     off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    8972 #else
    8973     uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    8974     off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
    8975     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    8976 #endif
    8977 
    8978     /* Check the EFlags bit. */
    8979     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    8980     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    8981     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
    8982                                                      !fCheckIfSet /*fJmpIfSet*/);
    8983 
    8984     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    8985     iemNativeRegFreeTmp(pReNative, idxEflReg);
    8986 
    8987     iemNativeCondStartIfBlock(pReNative, off);
    8988     return off;
    8989 }
    8990 
    8991 
    8992 #define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
    8993     off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
    8994     do {
    8995 
    8996 #define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
    8997     off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
    8998     do {
    8999 
    9000 #define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
    9001     off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
    9002     do {
    9003 
    9004 #define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
    9005     off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
    9006     do {
    9007 
    9008 /** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
    9009  *  IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
    9010  *  IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
    9011  *  IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
    9012 DECL_INLINE_THROW(uint32_t)
    9013 iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    9014                                                uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
    9015 {
    9016     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
    9017 
    9018     /* We have to load both RCX and EFLAGS before we can start branching,
    9019        otherwise we'll end up in the else-block with an inconsistent
    9020        register allocator state.
    9021        Doing EFLAGS first as it's more likely to be loaded, right? */
    9022     uint8_t const idxEflReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    9023                                                                  kIemNativeGstRegUse_ReadOnly);
    9024     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    9025                                                                  kIemNativeGstRegUse_ReadOnly);
    9026 
    9027     /** @todo we could reduce this to a single branch instruction by spending a
    9028      *        temporary register and some setnz stuff.  Not sure if loops are
    9029      *        worth it. */
    9030     /* Check RCX/ECX. */
    9031     if (f64Bit)
    9032         off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    9033     else
    9034         off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    9035 
    9036     /* Check the EFlags bit. */
    9037     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    9038     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    9039     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
    9040                                                      !fCheckIfSet /*fJmpIfSet*/);
    9041 
    9042     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    9043     iemNativeRegFreeTmp(pReNative, idxEflReg);
    9044 
    9045     iemNativeCondStartIfBlock(pReNative, off);
    9046     return off;
    9047 }
    9048 
    9049 
    9050 
    90516936/*********************************************************************************************************************************
    90526937*   Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++                                                            *
    90536938*********************************************************************************************************************************/
    9054 /** Number of hidden arguments for CIMPL calls.
    9055  * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
    9056 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    9057 # define IEM_CIMPL_HIDDEN_ARGS 3
    9058 #else
    9059 # define IEM_CIMPL_HIDDEN_ARGS 2
    9060 #endif
    9061 
    9062 #define IEM_MC_NOREF(a_Name) \
    9063     RT_NOREF_PV(a_Name)
    9064 
    9065 #define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
    9066     uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
    9067 
    9068 #define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
    9069     uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
    9070 
    9071 #define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
    9072     uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
    9073 
    9074 #define IEM_MC_LOCAL(a_Type, a_Name) \
    9075     uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
    9076 
    9077 #define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
    9078     uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
    9079 
    9080 
    9081 /**
    9082  * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
    9083  */
    9084 DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
    9085 {
    9086     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
    9087         return IEM_CIMPL_HIDDEN_ARGS;
    9088     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
    9089         return 1;
    9090     return 0;
    9091 }
    9092 
    90936939
    90946940/**
     
    92227068 * @throws  VERR_IEM_VAR_IPE_2
    92237069 */
    9224 static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
     7070DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    92257071{
    92267072    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     
    92527098 * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    92537099 */
    9254 static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
     7100DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
    92557101{
    92567102    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     
    93207166 * @throws  VERR_IEM_VAR_IPE_2
    93217167 */
    9322 static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    9323                                            IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
     7168DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
     7169                                                       IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
    93247170{
    93257171    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     
    96557501
    96567502/**
    9657  * Sets the host register for @a idxVarRc to @a idxReg.
    9658  *
    9659  * The register must not be allocated. Any guest register shadowing will be
    9660  * implictly dropped by this call.
    9661  *
    9662  * The variable must not have any register associated with it (causes
    9663  * VERR_IEM_VAR_IPE_10 to be raised).  Conversion to a stack variable is
    9664  * implied.
    9665  *
    9666  * @returns idxReg
    9667  * @param   pReNative   The recompiler state.
    9668  * @param   idxVar      The variable.
    9669  * @param   idxReg      The host register (typically IEMNATIVE_CALL_RET_GREG).
    9670  * @param   off         For recording in debug info.
    9671  *
    9672  * @throws  VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
    9673  */
    9674 DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
    9675 {
    9676     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9677     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    9678     Assert(!pVar->fRegAcquired);
    9679     Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    9680     AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
    9681     AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
    9682 
    9683     iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
    9684     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    9685 
    9686     iemNativeVarSetKindToStack(pReNative, idxVar);
    9687     pVar->idxReg = idxReg;
    9688 
    9689     return idxReg;
    9690 }
    9691 
    9692 
    9693 /**
    9694  * A convenient helper function.
    9695  */
    9696 DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    9697                                                              uint8_t idxReg, uint32_t *poff)
    9698 {
    9699     idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
    9700     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
    9701     return idxReg;
    9702 }
    9703 
    9704 
    9705 /**
    97067503 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
    97077504 *
     
    98857682 * ASSUMES that @a idxVar is valid and unpacked.
    98867683 */
    9887 DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
     7684DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    98887685{
    98897686    Assert(   pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid  /* Including invalid as we may have unused */
     
    99177714 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
    99187715 */
    9919 DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
     7716DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
    99207717{
    99217718    while (bmVars != 0)
     
    99427739    pReNative->Core.u64ArgVars = UINT64_MAX;
    99437740#endif
    9944 }
    9945 
    9946 
    9947 /**
    9948  * This is called by IEM_MC_END() to clean up all variables.
    9949  */
    9950 DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
    9951 {
    9952     uint32_t const bmVars = pReNative->Core.bmVars;
    9953     if (bmVars != 0)
    9954         iemNativeVarFreeAllSlow(pReNative, bmVars);
    9955     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    9956     Assert(pReNative->Core.bmStack    == 0);
    9957 }
    9958 
    9959 
    9960 #define IEM_MC_FREE_LOCAL(a_Name)   iemNativeVarFreeLocal(pReNative, a_Name)
    9961 
    9962 /**
    9963  * This is called by IEM_MC_FREE_LOCAL.
    9964  */
    9965 DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    9966 {
    9967     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9968     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
    9969     iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
    9970 }
    9971 
    9972 
    9973 #define IEM_MC_FREE_ARG(a_Name)     iemNativeVarFreeArg(pReNative, a_Name)
    9974 
    9975 /**
    9976  * This is called by IEM_MC_FREE_ARG.
    9977  */
    9978 DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    9979 {
    9980     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9981     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
    9982     iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
    9983 }
    9984 
    9985 
    9986 #define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
    9987 
    9988 /**
    9989  * This is called by IEM_MC_ASSIGN_TO_SMALLER.
    9990  */
    9991 DECL_INLINE_THROW(uint32_t)
    9992 iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
    9993 {
    9994     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
    9995     PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
    9996     AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    9997     Assert(   pVarDst->cbVar == sizeof(uint16_t)
    9998            || pVarDst->cbVar == sizeof(uint32_t));
    9999 
    10000     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
    10001     PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
    10002     AssertStmt(   pVarSrc->enmKind == kIemNativeVarKind_Stack
    10003                || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
    10004                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    10005 
    10006     Assert(pVarDst->cbVar < pVarSrc->cbVar);
    10007 
    10008     /*
    10009      * Special case for immediates.
    10010      */
    10011     if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
    10012     {
    10013         switch (pVarDst->cbVar)
    10014         {
    10015             case sizeof(uint16_t):
    10016                 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
    10017                 break;
    10018             case sizeof(uint32_t):
    10019                 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
    10020                 break;
    10021             default: AssertFailed(); break;
    10022         }
    10023     }
    10024     else
    10025     {
    10026         /*
    10027          * The generic solution for now.
    10028          */
    10029         /** @todo optimize this by having the python script make sure the source
    10030          *        variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
    10031          *        statement.   Then we could just transfer the register assignments. */
    10032         uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
    10033         uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
    10034         switch (pVarDst->cbVar)
    10035         {
    10036             case sizeof(uint16_t):
    10037                 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
    10038                 break;
    10039             case sizeof(uint32_t):
    10040                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
    10041                 break;
    10042             default: AssertFailed(); break;
    10043         }
    10044         iemNativeVarRegisterRelease(pReNative, idxVarSrc);
    10045         iemNativeVarRegisterRelease(pReNative, idxVarDst);
    10046     }
    10047     return off;
    100487741}
    100497742
     
    104818174
    104828175
    10483 /** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
    10484 DECL_HIDDEN_THROW(uint32_t)
    10485 iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
    10486                              uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
    10487 
    10488 {
    10489     /*
    10490      * Do all the call setup and cleanup.
    10491      */
    10492     off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
    10493 
    10494     /*
    10495      * Load the two or three hidden arguments.
    10496      */
    10497 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    10498     off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    10499     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    10500     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
    10501 #else
    10502     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    10503     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
    10504 #endif
    10505 
    10506     /*
    10507      * Make the call and check the return code.
    10508      *
    10509      * Shadow PC copies are always flushed here, other stuff depends on flags.
    10510      * Segment and general purpose registers are explictily flushed via the
    10511      * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
    10512      * macros.
    10513      */
    10514     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
    10515 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    10516     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    10517 #endif
    10518     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
    10519     if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls.  */
    10520         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    10521     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    10522 
    10523     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    10524 }
    10525 
    10526 
    10527 #define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
    10528     off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
    10529 
    10530 /** Emits code for IEM_MC_CALL_CIMPL_1. */
    10531 DECL_INLINE_THROW(uint32_t)
    10532 iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    10533                         uintptr_t pfnCImpl, uint8_t idxArg0)
    10534 {
    10535     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    10536     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
    10537 }
    10538 
    10539 
    10540 #define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
    10541     off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
    10542 
    10543 /** Emits code for IEM_MC_CALL_CIMPL_2. */
    10544 DECL_INLINE_THROW(uint32_t)
    10545 iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    10546                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
    10547 {
    10548     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    10549     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    10550     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
    10551 }
    10552 
    10553 
    10554 #define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
    10555     off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    10556                                   (uintptr_t)a_pfnCImpl, a0, a1, a2)
    10557 
    10558 /** Emits code for IEM_MC_CALL_CIMPL_3. */
    10559 DECL_INLINE_THROW(uint32_t)
    10560 iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    10561                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
    10562 {
    10563     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    10564     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    10565     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    10566     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
    10567 }
    10568 
    10569 
    10570 #define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
    10571     off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    10572                                   (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
    10573 
    10574 /** Emits code for IEM_MC_CALL_CIMPL_4. */
    10575 DECL_INLINE_THROW(uint32_t)
    10576 iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    10577                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
    10578 {
    10579     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    10580     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    10581     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    10582     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
    10583     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
    10584 }
    10585 
    10586 
    10587 #define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
    10588     off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    10589                                   (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
    10590 
    10591 /** Emits code for IEM_MC_CALL_CIMPL_4. */
    10592 DECL_INLINE_THROW(uint32_t)
    10593 iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    10594                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
    10595 {
    10596     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    10597     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    10598     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    10599     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
    10600     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
    10601     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
    10602 }
    10603 
    10604 
    10605 /** Recompiler debugging: Flush guest register shadow copies. */
    10606 #define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
    10607 
    10608 
    10609 
    10610 /*********************************************************************************************************************************
    10611 *   Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX                                                            *
    10612 *********************************************************************************************************************************/
    10613 
    10614 /**
    10615  * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
    10616  */
    10617 DECL_INLINE_THROW(uint32_t)
    10618 iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    10619                              uintptr_t pfnAImpl, uint8_t cArgs)
    10620 {
    10621     if (idxVarRc != UINT8_MAX)
    10622     {
    10623         IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
    10624         PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
    10625         AssertStmt(pVarRc->uArgNo == UINT8_MAX,       IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
    10626         AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
    10627     }
    10628 
    10629     /*
    10630      * Do all the call setup and cleanup.
    10631      */
    10632     off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
    10633 
    10634     /*
    10635      * Make the call and update the return code variable if we've got one.
    10636      */
    10637     off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
    10638     if (idxVarRc != UINT8_MAX)
    10639     {
    10640 off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
    10641         iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
    10642     }
    10643 
    10644     return off;
    10645 }
    10646 
    10647 
    10648 
    10649 #define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
    10650     off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
    10651 
    10652 #define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
    10653     off = iemNativeEmitCallAImpl0(pReNative, off, a_rc,                   (uintptr_t)(a_pfn))
    10654 
    10655 /** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
    10656 DECL_INLINE_THROW(uint32_t)
    10657 iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
    10658 {
    10659     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
    10660 }
    10661 
    10662 
    10663 #define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
    10664     off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
    10665 
    10666 #define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
    10667     off = iemNativeEmitCallAImpl1(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0)
    10668 
    10669 /** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
    10670 DECL_INLINE_THROW(uint32_t)
    10671 iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
    10672 {
    10673     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    10674     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
    10675 }
    10676 
    10677 
    10678 #define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
    10679     off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
    10680 
    10681 #define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
    10682     off = iemNativeEmitCallAImpl2(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1)
    10683 
    10684 /** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
    10685 DECL_INLINE_THROW(uint32_t)
    10686 iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    10687                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
    10688 {
    10689     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    10690     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    10691     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
    10692 }
    10693 
    10694 
    10695 #define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
    10696     off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
    10697 
    10698 #define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
    10699     off = iemNativeEmitCallAImpl3(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1, a2)
    10700 
    10701 /** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
    10702 DECL_INLINE_THROW(uint32_t)
    10703 iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    10704                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
    10705 {
    10706     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    10707     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    10708     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
    10709     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
    10710 }
    10711 
    10712 
    10713 #define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
    10714     off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
    10715 
    10716 #define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
    10717     off = iemNativeEmitCallAImpl4(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1, a2, a3)
    10718 
    10719 /** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
    10720 DECL_INLINE_THROW(uint32_t)
    10721 iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    10722                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
    10723 {
    10724     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    10725     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    10726     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
    10727     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
    10728     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
    10729 }
    10730 
    10731 
    10732 
    10733 /*********************************************************************************************************************************
    10734 *   Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX).                                                       *
    10735 *********************************************************************************************************************************/
    10736 
    10737 #define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
    10738     off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst,  a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
    10739 
    10740 #define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
    10741     off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
    10742 
    10743 #define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
    10744     off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
    10745 
    10746 #define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
    10747     off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
    10748 
    10749 
    10750 /** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
    10751  *  IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
    10752 DECL_INLINE_THROW(uint32_t)
    10753 iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
    10754 {
    10755     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    10756     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
    10757     Assert(iGRegEx < 20);
    10758 
    10759     /* Same discussion as in iemNativeEmitFetchGregU16 */
    10760     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    10761                                                                   kIemNativeGstRegUse_ReadOnly);
    10762 
    10763     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    10764     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    10765 
    10766     /* The value is zero-extended to the full 64-bit host register width. */
    10767     if (iGRegEx < 16)
    10768         off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    10769     else
    10770         off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
    10771 
    10772     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    10773     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    10774     return off;
    10775 }
    10776 
    10777 
    10778 #define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
    10779     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
    10780 
    10781 #define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
    10782     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
    10783 
    10784 #define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
    10785     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
    10786 
    10787 /** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
    10788 DECL_INLINE_THROW(uint32_t)
    10789 iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
    10790 {
    10791     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    10792     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
    10793     Assert(iGRegEx < 20);
    10794 
    10795     /* Same discussion as in iemNativeEmitFetchGregU16 */
    10796     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    10797                                                                   kIemNativeGstRegUse_ReadOnly);
    10798 
    10799     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    10800     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    10801 
    10802     if (iGRegEx < 16)
    10803     {
    10804         switch (cbSignExtended)
    10805         {
    10806             case sizeof(uint16_t):
    10807                 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    10808                 break;
    10809             case sizeof(uint32_t):
    10810                 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    10811                 break;
    10812             case sizeof(uint64_t):
    10813                 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    10814                 break;
    10815             default: AssertFailed(); break;
    10816         }
    10817     }
    10818     else
    10819     {
    10820         off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
    10821         switch (cbSignExtended)
    10822         {
    10823             case sizeof(uint16_t):
    10824                 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    10825                 break;
    10826             case sizeof(uint32_t):
    10827                 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    10828                 break;
    10829             case sizeof(uint64_t):
    10830                 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    10831                 break;
    10832             default: AssertFailed(); break;
    10833         }
    10834     }
    10835 
    10836     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    10837     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    10838     return off;
    10839 }
    10840 
    10841 
    10842 
    10843 #define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
    10844     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
    10845 
    10846 #define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
    10847     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
    10848 
    10849 #define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
    10850     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
    10851 
    10852 /** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
    10853 DECL_INLINE_THROW(uint32_t)
    10854 iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
    10855 {
    10856     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    10857     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
    10858     Assert(iGReg < 16);
    10859 
    10860     /*
    10861      * We can either just load the low 16-bit of the GPR into a host register
    10862      * for the variable, or we can do so via a shadow copy host register. The
    10863      * latter will avoid having to reload it if it's being stored later, but
    10864      * will waste a host register if it isn't touched again.  Since we don't
    10865      * know what going to happen, we choose the latter for now.
    10866      */
    10867     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    10868                                                                   kIemNativeGstRegUse_ReadOnly);
    10869 
    10870     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    10871     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    10872     off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    10873     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    10874 
    10875     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    10876     return off;
    10877 }
    10878 
    10879 
    10880 #define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
    10881     off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
    10882 
    10883 #define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
    10884     off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
    10885 
    10886 /** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
    10887 DECL_INLINE_THROW(uint32_t)
    10888 iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
    10889 {
    10890     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    10891     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
    10892     Assert(iGReg < 16);
    10893 
    10894     /*
    10895      * We can either just load the low 16-bit of the GPR into a host register
    10896      * for the variable, or we can do so via a shadow copy host register. The
    10897      * latter will avoid having to reload it if it's being stored later, but
    10898      * will waste a host register if it isn't touched again.  Since we don't
    10899      * know what going to happen, we choose the latter for now.
    10900      */
    10901     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    10902                                                                   kIemNativeGstRegUse_ReadOnly);
    10903 
    10904     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    10905     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    10906     if (cbSignExtended == sizeof(uint32_t))
    10907         off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    10908     else
    10909     {
    10910         Assert(cbSignExtended == sizeof(uint64_t));
    10911         off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    10912     }
    10913     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    10914 
    10915     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    10916     return off;
    10917 }
    10918 
    10919 
    10920 #define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
    10921     off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
    10922 
    10923 #define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
    10924     off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
    10925 
    10926 /** Emits code for IEM_MC_FETCH_GREG_U32. */
    10927 DECL_INLINE_THROW(uint32_t)
    10928 iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
    10929 {
    10930     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    10931     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
    10932     Assert(iGReg < 16);
    10933 
    10934     /*
    10935      * We can either just load the low 16-bit of the GPR into a host register
    10936      * for the variable, or we can do so via a shadow copy host register. The
    10937      * latter will avoid having to reload it if it's being stored later, but
    10938      * will waste a host register if it isn't touched again.  Since we don't
    10939      * know what going to happen, we choose the latter for now.
    10940      */
    10941     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    10942                                                                   kIemNativeGstRegUse_ReadOnly);
    10943 
    10944     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    10945     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    10946     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
    10947     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    10948 
    10949     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    10950     return off;
    10951 }
    10952 
    10953 
    10954 #define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
    10955     off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
    10956 
    10957 /** Emits code for IEM_MC_FETCH_GREG_U32. */
    10958 DECL_INLINE_THROW(uint32_t)
    10959 iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
    10960 {
    10961     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    10962     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
    10963     Assert(iGReg < 16);
    10964 
    10965     /*
    10966      * We can either just load the low 32-bit of the GPR into a host register
    10967      * for the variable, or we can do so via a shadow copy host register. The
    10968      * latter will avoid having to reload it if it's being stored later, but
    10969      * will waste a host register if it isn't touched again.  Since we don't
    10970      * know what going to happen, we choose the latter for now.
    10971      */
    10972     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    10973                                                                   kIemNativeGstRegUse_ReadOnly);
    10974 
    10975     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    10976     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    10977     off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
    10978     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    10979 
    10980     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    10981     return off;
    10982 }
    10983 
    10984 
    10985 #define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
    10986     off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
    10987 
    10988 #define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
    10989     off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
    10990 
    10991 /** Emits code for IEM_MC_FETCH_GREG_U64 (and the
    10992  *  IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
    10993 DECL_INLINE_THROW(uint32_t)
    10994 iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
    10995 {
    10996     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    10997     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
    10998     Assert(iGReg < 16);
    10999 
    11000     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11001                                                                   kIemNativeGstRegUse_ReadOnly);
    11002 
    11003     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    11004     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    11005     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
    11006     /** @todo name the register a shadow one already? */
    11007     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    11008 
    11009     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    11010     return off;
    11011 }
    11012 
    11013 
    11014 
    11015 /*********************************************************************************************************************************
    11016 *   Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX).                                                        *
    11017 *********************************************************************************************************************************/
    11018 
    11019 #define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
    11020     off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
    11021 
    11022 /** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
    11023 DECL_INLINE_THROW(uint32_t)
    11024 iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
    11025 {
    11026     Assert(iGRegEx < 20);
    11027     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    11028                                                                  kIemNativeGstRegUse_ForUpdate);
    11029 #ifdef RT_ARCH_AMD64
    11030     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
    11031 
    11032     /* To the lowest byte of the register: mov r8, imm8 */
    11033     if (iGRegEx < 16)
    11034     {
    11035         if (idxGstTmpReg >= 8)
    11036             pbCodeBuf[off++] = X86_OP_REX_B;
    11037         else if (idxGstTmpReg >= 4)
    11038             pbCodeBuf[off++] = X86_OP_REX;
    11039         pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
    11040         pbCodeBuf[off++] = u8Value;
    11041     }
    11042     /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
    11043     else if (idxGstTmpReg < 4)
    11044     {
    11045         pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
    11046         pbCodeBuf[off++] = u8Value;
    11047     }
    11048     else
    11049     {
    11050         /* ror reg64, 8 */
    11051         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    11052         pbCodeBuf[off++] = 0xc1;
    11053         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    11054         pbCodeBuf[off++] = 8;
    11055 
    11056         /* mov reg8, imm8  */
    11057         if (idxGstTmpReg >= 8)
    11058             pbCodeBuf[off++] = X86_OP_REX_B;
    11059         else if (idxGstTmpReg >= 4)
    11060             pbCodeBuf[off++] = X86_OP_REX;
    11061         pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
    11062         pbCodeBuf[off++] = u8Value;
    11063 
    11064         /* rol reg64, 8 */
    11065         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    11066         pbCodeBuf[off++] = 0xc1;
    11067         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    11068         pbCodeBuf[off++] = 8;
    11069     }
    11070 
    11071 #elif defined(RT_ARCH_ARM64)
    11072     uint8_t const    idxImmReg   = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
    11073     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    11074     if (iGRegEx < 16)
    11075         /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
    11076         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
    11077     else
    11078         /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
    11079         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
    11080     iemNativeRegFreeTmp(pReNative, idxImmReg);
    11081 
    11082 #else
    11083 # error "Port me!"
    11084 #endif
    11085 
    11086     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11087 
    11088     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
    11089 
    11090     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11091     return off;
    11092 }
    11093 
    11094 
    11095 #define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
    11096     off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
    11097 
    11098 /** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
    11099 DECL_INLINE_THROW(uint32_t)
    11100 iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
    11101 {
    11102     Assert(iGRegEx < 20);
    11103     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    11104 
    11105     /*
    11106      * If it's a constant value (unlikely) we treat this as a
    11107      * IEM_MC_STORE_GREG_U8_CONST statement.
    11108      */
    11109     PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
    11110     if (pValueVar->enmKind == kIemNativeVarKind_Stack)
    11111     { /* likely */ }
    11112     else
    11113     {
    11114         AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
    11115                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    11116         return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
    11117     }
    11118 
    11119     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    11120                                                                  kIemNativeGstRegUse_ForUpdate);
    11121     uint8_t const    idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
    11122 
    11123 #ifdef RT_ARCH_AMD64
    11124     /* To the lowest byte of the register: mov reg8, reg8(r/m) */
    11125     if (iGRegEx < 16)
    11126     {
    11127         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    11128         if (idxGstTmpReg >= 8 || idxVarReg >= 8)
    11129             pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
    11130         else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
    11131             pbCodeBuf[off++] = X86_OP_REX;
    11132         pbCodeBuf[off++] = 0x8a;
    11133         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
    11134     }
    11135     /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
    11136     else if (idxGstTmpReg < 4 && idxVarReg < 4)
    11137     {
    11138         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
    11139         pbCodeBuf[off++] = 0x8a;
    11140         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
    11141     }
    11142     else
    11143     {
    11144         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
    11145 
    11146         /* ror reg64, 8 */
    11147         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    11148         pbCodeBuf[off++] = 0xc1;
    11149         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    11150         pbCodeBuf[off++] = 8;
    11151 
    11152         /* mov reg8, reg8(r/m)  */
    11153         if (idxGstTmpReg >= 8 || idxVarReg >= 8)
    11154             pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
    11155         else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
    11156             pbCodeBuf[off++] = X86_OP_REX;
    11157         pbCodeBuf[off++] = 0x8a;
    11158         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
    11159 
    11160         /* rol reg64, 8 */
    11161         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    11162         pbCodeBuf[off++] = 0xc1;
    11163         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    11164         pbCodeBuf[off++] = 8;
    11165     }
    11166 
    11167 #elif defined(RT_ARCH_ARM64)
    11168     /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
    11169             or
    11170        bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
    11171     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11172     if (iGRegEx < 16)
    11173         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
    11174     else
    11175         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
    11176 
    11177 #else
    11178 # error "Port me!"
    11179 #endif
    11180     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11181 
    11182     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    11183 
    11184     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
    11185     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11186     return off;
    11187 }
    11188 
    11189 
    11190 
    11191 #define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
    11192     off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
    11193 
    11194 /** Emits code for IEM_MC_STORE_GREG_U16. */
    11195 DECL_INLINE_THROW(uint32_t)
    11196 iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
    11197 {
    11198     Assert(iGReg < 16);
    11199     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11200                                                                  kIemNativeGstRegUse_ForUpdate);
    11201 #ifdef RT_ARCH_AMD64
    11202     /* mov reg16, imm16 */
    11203     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    11204     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    11205     if (idxGstTmpReg >= 8)
    11206         pbCodeBuf[off++] = X86_OP_REX_B;
    11207     pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
    11208     pbCodeBuf[off++] = RT_BYTE1(uValue);
    11209     pbCodeBuf[off++] = RT_BYTE2(uValue);
    11210 
    11211 #elif defined(RT_ARCH_ARM64)
    11212     /* movk xdst, #uValue, lsl #0 */
    11213     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11214     pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
    11215 
    11216 #else
    11217 # error "Port me!"
    11218 #endif
    11219 
    11220     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11221 
    11222     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11223     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11224     return off;
    11225 }
    11226 
    11227 
    11228 #define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
    11229     off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
    11230 
    11231 /** Emits code for IEM_MC_STORE_GREG_U16. */
    11232 DECL_INLINE_THROW(uint32_t)
    11233 iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    11234 {
    11235     Assert(iGReg < 16);
    11236     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    11237 
    11238     /*
    11239      * If it's a constant value (unlikely) we treat this as a
    11240      * IEM_MC_STORE_GREG_U16_CONST statement.
    11241      */
    11242     PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
    11243     if (pValueVar->enmKind == kIemNativeVarKind_Stack)
    11244     { /* likely */ }
    11245     else
    11246     {
    11247         AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
    11248                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    11249         return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
    11250     }
    11251 
    11252     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11253                                                                  kIemNativeGstRegUse_ForUpdate);
    11254 
    11255 #ifdef RT_ARCH_AMD64
    11256     /* mov reg16, reg16 or [mem16] */
    11257     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
    11258     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    11259     if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    11260     {
    11261         if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
    11262             pbCodeBuf[off++] = (idxGstTmpReg      >= 8 ? X86_OP_REX_R : 0)
    11263                              | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
    11264         pbCodeBuf[off++] = 0x8b;
    11265         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
    11266     }
    11267     else
    11268     {
    11269         uint8_t const idxStackSlot = pValueVar->idxStackSlot;
    11270         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    11271         if (idxGstTmpReg >= 8)
    11272             pbCodeBuf[off++] = X86_OP_REX_R;
    11273         pbCodeBuf[off++] = 0x8b;
    11274         off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
    11275     }
    11276 
    11277 #elif defined(RT_ARCH_ARM64)
    11278     /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
    11279     uint8_t const    idxVarReg   = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
    11280     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11281     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
    11282     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    11283 
    11284 #else
    11285 # error "Port me!"
    11286 #endif
    11287 
    11288     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11289 
    11290     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11291     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11292     return off;
    11293 }
    11294 
    11295 
    11296 #define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
    11297     off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
    11298 
    11299 /** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
    11300 DECL_INLINE_THROW(uint32_t)
    11301 iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
    11302 {
    11303     Assert(iGReg < 16);
    11304     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11305                                                                  kIemNativeGstRegUse_ForFullWrite);
    11306     off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
    11307     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11308     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11309     return off;
    11310 }
    11311 
    11312 
    11313 #define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
    11314     off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
    11315 
    11316 /** Emits code for IEM_MC_STORE_GREG_U32. */
    11317 DECL_INLINE_THROW(uint32_t)
    11318 iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    11319 {
    11320     Assert(iGReg < 16);
    11321     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    11322 
    11323     /*
    11324      * If it's a constant value (unlikely) we treat this as a
    11325      * IEM_MC_STORE_GREG_U32_CONST statement.
    11326      */
    11327     PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
    11328     if (pValueVar->enmKind == kIemNativeVarKind_Stack)
    11329     { /* likely */ }
    11330     else
    11331     {
    11332         AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
    11333                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    11334         return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
    11335     }
    11336 
    11337     /*
    11338      * For the rest we allocate a guest register for the variable and writes
    11339      * it to the CPUMCTX structure.
    11340      */
    11341     uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
    11342     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11343 #ifdef VBOX_STRICT
    11344     off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
    11345 #endif
    11346     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    11347     return off;
    11348 }
    11349 
    11350 
    11351 #define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
    11352     off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
    11353 
    11354 /** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
    11355 DECL_INLINE_THROW(uint32_t)
    11356 iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
    11357 {
    11358     Assert(iGReg < 16);
    11359     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11360                                                                  kIemNativeGstRegUse_ForFullWrite);
    11361     off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
    11362     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11363     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11364     return off;
    11365 }
    11366 
    11367 
    11368 #define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
    11369     off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
    11370 
    11371 /** Emits code for IEM_MC_STORE_GREG_U64. */
    11372 DECL_INLINE_THROW(uint32_t)
    11373 iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    11374 {
    11375     Assert(iGReg < 16);
    11376     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    11377 
    11378     /*
    11379      * If it's a constant value (unlikely) we treat this as a
    11380      * IEM_MC_STORE_GREG_U64_CONST statement.
    11381      */
    11382     PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
    11383     if (pValueVar->enmKind == kIemNativeVarKind_Stack)
    11384     { /* likely */ }
    11385     else
    11386     {
    11387         AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
    11388                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    11389         return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
    11390     }
    11391 
    11392     /*
    11393      * For the rest we allocate a guest register for the variable and writes
    11394      * it to the CPUMCTX structure.
    11395      */
    11396     uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
    11397     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11398     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    11399     return off;
    11400 }
    11401 
    11402 
    11403 #define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
    11404     off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
    11405 
    11406 /** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
    11407 DECL_INLINE_THROW(uint32_t)
    11408 iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
    11409 {
    11410     Assert(iGReg < 16);
    11411     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11412                                                                  kIemNativeGstRegUse_ForUpdate);
    11413     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
    11414     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11415     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11416     return off;
    11417 }
    11418 
    11419 
    11420 /*********************************************************************************************************************************
    11421 *   General purpose register manipulation (add, sub).                                                                            *
    11422 *********************************************************************************************************************************/
    11423 
    11424 #define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
    11425     off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
    11426 
    11427 /** Emits code for IEM_MC_ADD_GREG_U16. */
    11428 DECL_INLINE_THROW(uint32_t)
    11429 iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
    11430 {
    11431     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11432                                                                  kIemNativeGstRegUse_ForUpdate);
    11433 
    11434 #ifdef RT_ARCH_AMD64
    11435     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    11436     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    11437     if (idxGstTmpReg >= 8)
    11438         pbCodeBuf[off++] = X86_OP_REX_B;
    11439     if (uAddend == 1)
    11440     {
    11441         pbCodeBuf[off++] = 0xff; /* inc */
    11442         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    11443     }
    11444     else
    11445     {
    11446         pbCodeBuf[off++] = 0x81;
    11447         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    11448         pbCodeBuf[off++] = uAddend;
    11449         pbCodeBuf[off++] = 0;
    11450     }
    11451 
    11452 #else
    11453     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    11454     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    11455 
    11456     /* sub tmp, gstgrp, uAddend */
    11457     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
    11458 
    11459     /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
    11460     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
    11461 
    11462     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    11463 #endif
    11464 
    11465     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11466 
    11467     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11468 
    11469     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11470     return off;
    11471 }
    11472 
    11473 
    11474 #define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
    11475     off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
    11476 
    11477 #define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
    11478     off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
    11479 
    11480 /** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
    11481 DECL_INLINE_THROW(uint32_t)
    11482 iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
    11483 {
    11484     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11485                                                                  kIemNativeGstRegUse_ForUpdate);
    11486 
    11487 #ifdef RT_ARCH_AMD64
    11488     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    11489     if (f64Bit)
    11490         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
    11491     else if (idxGstTmpReg >= 8)
    11492         pbCodeBuf[off++] = X86_OP_REX_B;
    11493     if (uAddend == 1)
    11494     {
    11495         pbCodeBuf[off++] = 0xff; /* inc */
    11496         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    11497     }
    11498     else if (uAddend < 128)
    11499     {
    11500         pbCodeBuf[off++] = 0x83; /* add */
    11501         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    11502         pbCodeBuf[off++] = RT_BYTE1(uAddend);
    11503     }
    11504     else
    11505     {
    11506         pbCodeBuf[off++] = 0x81; /* add */
    11507         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    11508         pbCodeBuf[off++] = RT_BYTE1(uAddend);
    11509         pbCodeBuf[off++] = 0;
    11510         pbCodeBuf[off++] = 0;
    11511         pbCodeBuf[off++] = 0;
    11512     }
    11513 
    11514 #else
    11515     /* sub tmp, gstgrp, uAddend */
    11516     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11517     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
    11518 
    11519 #endif
    11520 
    11521     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11522 
    11523     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11524 
    11525     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11526     return off;
    11527 }
    11528 
    11529 
    11530 
    11531 #define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
    11532     off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
    11533 
    11534 /** Emits code for IEM_MC_SUB_GREG_U16. */
    11535 DECL_INLINE_THROW(uint32_t)
    11536 iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
    11537 {
    11538     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11539                                                                  kIemNativeGstRegUse_ForUpdate);
    11540 
    11541 #ifdef RT_ARCH_AMD64
    11542     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    11543     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    11544     if (idxGstTmpReg >= 8)
    11545         pbCodeBuf[off++] = X86_OP_REX_B;
    11546     if (uSubtrahend == 1)
    11547     {
    11548         pbCodeBuf[off++] = 0xff; /* dec */
    11549         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    11550     }
    11551     else
    11552     {
    11553         pbCodeBuf[off++] = 0x81;
    11554         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    11555         pbCodeBuf[off++] = uSubtrahend;
    11556         pbCodeBuf[off++] = 0;
    11557     }
    11558 
    11559 #else
    11560     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    11561     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    11562 
    11563     /* sub tmp, gstgrp, uSubtrahend */
    11564     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
    11565 
    11566     /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
    11567     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
    11568 
    11569     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    11570 #endif
    11571 
    11572     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11573 
    11574     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11575 
    11576     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11577     return off;
    11578 }
    11579 
    11580 
    11581 #define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
    11582     off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
    11583 
    11584 #define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
    11585     off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
    11586 
    11587 /** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
    11588 DECL_INLINE_THROW(uint32_t)
    11589 iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
    11590 {
    11591     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    11592                                                                  kIemNativeGstRegUse_ForUpdate);
    11593 
    11594 #ifdef RT_ARCH_AMD64
    11595     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    11596     if (f64Bit)
    11597         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
    11598     else if (idxGstTmpReg >= 8)
    11599         pbCodeBuf[off++] = X86_OP_REX_B;
    11600     if (uSubtrahend == 1)
    11601     {
    11602         pbCodeBuf[off++] = 0xff; /* dec */
    11603         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    11604     }
    11605     else if (uSubtrahend < 128)
    11606     {
    11607         pbCodeBuf[off++] = 0x83; /* sub */
    11608         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    11609         pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
    11610     }
    11611     else
    11612     {
    11613         pbCodeBuf[off++] = 0x81; /* sub */
    11614         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    11615         pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
    11616         pbCodeBuf[off++] = 0;
    11617         pbCodeBuf[off++] = 0;
    11618         pbCodeBuf[off++] = 0;
    11619     }
    11620 
    11621 #else
    11622     /* sub tmp, gstgrp, uSubtrahend */
    11623     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11624     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
    11625 
    11626 #endif
    11627 
    11628     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11629 
    11630     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    11631 
    11632     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    11633     return off;
    11634 }
    11635 
    11636 
    11637 /*********************************************************************************************************************************
    11638 *   Local variable manipulation (add, sub, and, or).                                                                             *
    11639 *********************************************************************************************************************************/
    11640 
    11641 #define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
    11642     off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
    11643 
    11644 #define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
    11645     off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
    11646 
    11647 #define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
    11648     off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
    11649 
    11650 #define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
    11651     off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
    11652 
    11653 /** Emits code for AND'ing a local and a constant value.   */
    11654 DECL_INLINE_THROW(uint32_t)
    11655 iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
    11656 {
    11657 #ifdef VBOX_STRICT
    11658     switch (cbMask)
    11659     {
    11660         case sizeof(uint8_t):  Assert((uint8_t)uMask  == uMask); break;
    11661         case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
    11662         case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
    11663         case sizeof(uint64_t): break;
    11664         default: AssertFailedBreak();
    11665     }
    11666 #endif
    11667 
    11668     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
    11669     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
    11670 
    11671     if (cbMask <= sizeof(uint32_t))
    11672         off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
    11673     else
    11674         off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
    11675 
    11676     iemNativeVarRegisterRelease(pReNative, idxVar);
    11677     return off;
    11678 }
    11679 
    11680 
    11681 #define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
    11682     off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
    11683 
    11684 #define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
    11685     off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
    11686 
    11687 #define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
    11688     off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
    11689 
    11690 #define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
    11691     off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
    11692 
    11693 /** Emits code for OR'ing a local and a constant value.   */
    11694 DECL_INLINE_THROW(uint32_t)
    11695 iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
    11696 {
    11697 #ifdef VBOX_STRICT
    11698     switch (cbMask)
    11699     {
    11700         case sizeof(uint8_t):  Assert((uint8_t)uMask  == uMask); break;
    11701         case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
    11702         case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
    11703         case sizeof(uint64_t): break;
    11704         default: AssertFailedBreak();
    11705     }
    11706 #endif
    11707 
    11708     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
    11709     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
    11710 
    11711     if (cbMask <= sizeof(uint32_t))
    11712         off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
    11713     else
    11714         off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
    11715 
    11716     iemNativeVarRegisterRelease(pReNative, idxVar);
    11717     return off;
    11718 }
    11719 
    11720 
    11721 #define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
    11722     off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
    11723 
    11724 #define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
    11725     off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
    11726 
    11727 #define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
    11728     off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
    11729 
    11730 /** Emits code for reversing the byte order in a local value.   */
    11731 DECL_INLINE_THROW(uint32_t)
    11732 iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
    11733 {
    11734     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
    11735     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
    11736 
    11737     switch (cbLocal)
    11738     {
    11739         case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
    11740         case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
    11741         case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg);   break;
    11742         default: AssertFailedBreak();
    11743     }
    11744 
    11745     iemNativeVarRegisterRelease(pReNative, idxVar);
    11746     return off;
    11747 }
    11748 
    11749 
    11750 
    11751 /*********************************************************************************************************************************
    11752 *   EFLAGS                                                                                                                       *
    11753 *********************************************************************************************************************************/
    11754 
    11755 #if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
    11756 # define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput)     ((void)0)
    11757 #else
    11758 # define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
    11759     iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
    11760 
    11761 DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
    11762 {
    11763     if (fEflOutput)
    11764     {
    11765         PVMCPUCC const pVCpu = pReNative->pVCpu;
    11766 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    11767         IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
    11768         IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
    11769         AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
    11770 #  define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
    11771             if (fEflOutput & (a_fEfl)) \
    11772             { \
    11773                 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
    11774                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
    11775                 else \
    11776                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
    11777             } else do { } while (0)
    11778 # else
    11779         PCIEMLIVENESSENTRY const pLivenessEntry       = &pReNative->paLivenessEntries[pReNative->idxCurCall];
    11780         IEMLIVENESSBIT const     LivenessClobbered    =
    11781         {
    11782               pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    11783             & ~(  pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    11784                 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
    11785                 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
    11786         };
    11787         IEMLIVENESSBIT const     LivenessDelayable =
    11788         {
    11789               pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    11790             & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
    11791             & ~(  pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    11792                 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
    11793         };
    11794 #  define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
    11795             if (fEflOutput & (a_fEfl)) \
    11796             { \
    11797                 if (LivenessClobbered.a_fLivenessMember) \
    11798                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
    11799                 else if (LivenessDelayable.a_fLivenessMember) \
    11800                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
    11801                 else \
    11802                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
    11803             } else do { } while (0)
    11804 # endif
    11805         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
    11806         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
    11807         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
    11808         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
    11809         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
    11810         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
    11811         //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
    11812 # undef CHECK_FLAG_AND_UPDATE_STATS
    11813     }
    11814     RT_NOREF(fEflInput);
    11815 }
    11816 #endif /* VBOX_WITH_STATISTICS */
    11817 
    11818 #undef  IEM_MC_FETCH_EFLAGS /* should not be used */
    11819 #define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
    11820     off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
    11821 
    11822 /** Handles IEM_MC_FETCH_EFLAGS_EX. */
    11823 DECL_INLINE_THROW(uint32_t)
    11824 iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
    11825                          uint32_t fEflInput, uint32_t fEflOutput)
    11826 {
    11827     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
    11828     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
    11829     RT_NOREF(fEflInput, fEflOutput);
    11830 
    11831 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    11832 # ifdef VBOX_STRICT
    11833     if (   pReNative->idxCurCall != 0
    11834         && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
    11835     {
    11836         PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
    11837         uint32_t const           fBoth          = fEflInput | fEflOutput;
    11838 # define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
    11839             AssertMsg(   !(fBoth & (a_fElfConst)) \
    11840                       || (!(fEflInput & (a_fElfConst)) \
    11841                           ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
    11842                           : !(fEflOutput & (a_fElfConst)) \
    11843                           ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED(  iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
    11844                           : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
    11845                       ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
    11846         ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
    11847         ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
    11848         ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
    11849         ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
    11850         ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
    11851         ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
    11852         ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
    11853 # undef ASSERT_ONE_EFL
    11854     }
    11855 # endif
    11856 #endif
    11857 
    11858     /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
    11859      *        the existing shadow copy. */
    11860     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
    11861     iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
    11862     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
    11863     iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
    11864     return off;
    11865 }
    11866 
    11867 
    11868 
    11869 /** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
    11870  * start using it with custom native code emission (inlining assembly
    11871  * instruction helpers). */
    11872 #undef  IEM_MC_COMMIT_EFLAGS /* should not be used */
    11873 #define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
    11874     IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
    11875     off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
    11876 
    11877 /** Handles IEM_MC_COMMIT_EFLAGS_EX. */
    11878 DECL_INLINE_THROW(uint32_t)
    11879 iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
    11880 {
    11881     RT_NOREF(fEflOutput);
    11882     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
    11883     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
    11884 
    11885 #ifdef VBOX_STRICT
    11886     off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
    11887     uint32_t offFixup = off;
    11888     off = iemNativeEmitJnzToFixed(pReNative, off, off);
    11889     off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
    11890     iemNativeFixupFixedJump(pReNative, offFixup, off);
    11891 
    11892     off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
    11893     offFixup = off;
    11894     off = iemNativeEmitJzToFixed(pReNative, off, off);
    11895     off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
    11896     iemNativeFixupFixedJump(pReNative, offFixup, off);
    11897 
    11898     /** @todo validate that only bits in the fElfOutput mask changed. */
    11899 #endif
    11900 
    11901     iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
    11902     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
    11903     iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
    11904     return off;
    11905 }
    11906 
    11907 
    11908 
    11909 /*********************************************************************************************************************************
    11910 *   Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
    11911 *********************************************************************************************************************************/
    11912 
    11913 #define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
    11914     off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
    11915 
    11916 #define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
    11917     off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
    11918 
    11919 #define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
    11920     off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
    11921 
    11922 
    11923 /** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
    11924  *  IEM_MC_FETCH_SREG_ZX_U64. */
    11925 DECL_INLINE_THROW(uint32_t)
    11926 iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
    11927 {
    11928     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    11929     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
    11930     Assert(iSReg < X86_SREG_COUNT);
    11931 
    11932     /*
    11933      * For now, we will not create a shadow copy of a selector.  The rational
    11934      * is that since we do not recompile the popping and loading of segment
    11935      * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
    11936      * pushing and moving to registers, there is only a small chance that the
    11937      * shadow copy will be accessed again before the register is reloaded.  One
    11938      * scenario would be nested called in 16-bit code, but I doubt it's worth
    11939      * the extra register pressure atm.
    11940      *
    11941      * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
    11942      * and iemNativeVarRegisterAcquire for a load scenario. We only got the
    11943      * store scencario covered at present (r160730).
    11944      */
    11945     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    11946     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    11947     off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
    11948     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    11949     return off;
    11950 }
    11951 
    11952 
    11953 
    11954 /*********************************************************************************************************************************
    11955 *   Register references.                                                                                                         *
    11956 *********************************************************************************************************************************/
    11957 
    11958 #define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
    11959     off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
    11960 
    11961 #define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
    11962     off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
    11963 
    11964 /** Handles IEM_MC_REF_GREG_U8[_CONST]. */
    11965 DECL_INLINE_THROW(uint32_t)
    11966 iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
    11967 {
    11968     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
    11969     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
    11970     Assert(iGRegEx < 20);
    11971 
    11972     if (iGRegEx < 16)
    11973         iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
    11974     else
    11975         iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
    11976 
    11977     /* If we've delayed writing back the register value, flush it now. */
    11978     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
    11979 
    11980     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    11981     if (!fConst)
    11982         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
    11983 
    11984     return off;
    11985 }
    11986 
    11987 #define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
    11988     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
    11989 
    11990 #define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
    11991     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
    11992 
    11993 #define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
    11994     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
    11995 
    11996 #define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
    11997     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
    11998 
    11999 #define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
    12000     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
    12001 
    12002 #define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
    12003     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
    12004 
    12005 #define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
    12006     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
    12007 
    12008 #define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
    12009     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
    12010 
    12011 #define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
    12012     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
    12013 
    12014 #define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
    12015     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
    12016 
    12017 /** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
    12018 DECL_INLINE_THROW(uint32_t)
    12019 iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
    12020 {
    12021     Assert(iGReg < 16);
    12022     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
    12023     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
    12024 
    12025     /* If we've delayed writing back the register value, flush it now. */
    12026     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
    12027 
    12028     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    12029     if (!fConst)
    12030         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
    12031 
    12032     return off;
    12033 }
    12034 
    12035 
    12036 #undef  IEM_MC_REF_EFLAGS /* should not be used. */
    12037 #define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
    12038     IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
    12039     off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
    12040 
    12041 /** Handles IEM_MC_REF_EFLAGS. */
    12042 DECL_INLINE_THROW(uint32_t)
    12043 iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
    12044 {
    12045     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
    12046     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
    12047 
    12048     /* If we've delayed writing back the register value, flush it now. */
    12049     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
    12050 
    12051     /* If there is a shadow copy of guest EFLAGS, flush it now. */
    12052     iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
    12053 
    12054     return off;
    12055 }
    12056 
    12057 
    12058 /** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds?  Once we emit
    12059  * different code from threaded recompiler, maybe it would be helpful. For now
    12060  * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
    12061 #define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
    12062 
    12063 
    12064 #define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
    12065     off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
    12066 
    12067 #define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
    12068     off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
    12069 
    12070 #define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
    12071     off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
    12072 
    12073 /** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
    12074 DECL_INLINE_THROW(uint32_t)
    12075 iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
    12076 {
    12077     Assert(iXReg < 16);
    12078     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
    12079     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
    12080 
    12081     /* If we've delayed writing back the register value, flush it now. */
    12082     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
    12083 
    12084 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    12085     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    12086     if (!fConst)
    12087         iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
    12088 #else
    12089     RT_NOREF(fConst);
    12090 #endif
    12091 
    12092     return off;
    12093 }
    12094 
    12095 
    12096 #define IEM_MC_REF_MXCSR(a_pfMxcsr) \
    12097     off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
    12098 
    12099 /** Handles IEM_MC_REF_MXCSR. */
    12100 DECL_INLINE_THROW(uint32_t)
    12101 iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
    12102 {
    12103     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
    12104     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
    12105 
    12106     /* If we've delayed writing back the register value, flush it now. */
    12107     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
    12108 
    12109     /* If there is a shadow copy of guest MXCSR, flush it now. */
    12110     iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
    12111 
    12112     return off;
    12113 }
    12114 
    12115 
    12116 
    12117 /*********************************************************************************************************************************
    12118 *   Effective Address Calculation                                                                                                *
    12119 *********************************************************************************************************************************/
    12120 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
    12121     off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
    12122 
    12123 /** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
    12124  * @sa iemOpHlpCalcRmEffAddrThreadedAddr16  */
    12125 DECL_INLINE_THROW(uint32_t)
    12126 iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    12127                                          uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
    12128 {
    12129     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    12130 
    12131     /*
    12132      * Handle the disp16 form with no registers first.
    12133      *
    12134      * Convert to an immediate value, as that'll delay the register allocation
    12135      * and assignment till the memory access / call / whatever and we can use
    12136      * a more appropriate register (or none at all).
    12137      */
    12138     if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
    12139     {
    12140         iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
    12141         return off;
    12142     }
    12143 
    12144     /* Determin the displacment. */
    12145     uint16_t u16EffAddr;
    12146     switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    12147     {
    12148         case 0:  u16EffAddr = 0;                        break;
    12149         case 1:  u16EffAddr = (int16_t)(int8_t)u16Disp; break;
    12150         case 2:  u16EffAddr = u16Disp;                  break;
    12151         default: AssertFailedStmt(u16EffAddr = 0);
    12152     }
    12153 
    12154     /* Determine the registers involved. */
    12155     uint8_t idxGstRegBase;
    12156     uint8_t idxGstRegIndex;
    12157     switch (bRm & X86_MODRM_RM_MASK)
    12158     {
    12159         case 0:
    12160             idxGstRegBase  = X86_GREG_xBX;
    12161             idxGstRegIndex = X86_GREG_xSI;
    12162             break;
    12163         case 1:
    12164             idxGstRegBase  = X86_GREG_xBX;
    12165             idxGstRegIndex = X86_GREG_xDI;
    12166             break;
    12167         case 2:
    12168             idxGstRegBase  = X86_GREG_xBP;
    12169             idxGstRegIndex = X86_GREG_xSI;
    12170             break;
    12171         case 3:
    12172             idxGstRegBase  = X86_GREG_xBP;
    12173             idxGstRegIndex = X86_GREG_xDI;
    12174             break;
    12175         case 4:
    12176             idxGstRegBase  = X86_GREG_xSI;
    12177             idxGstRegIndex = UINT8_MAX;
    12178             break;
    12179         case 5:
    12180             idxGstRegBase  = X86_GREG_xDI;
    12181             idxGstRegIndex = UINT8_MAX;
    12182             break;
    12183         case 6:
    12184             idxGstRegBase  = X86_GREG_xBP;
    12185             idxGstRegIndex = UINT8_MAX;
    12186             break;
    12187 #ifdef _MSC_VER  /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
    12188         default:
    12189 #endif
    12190         case 7:
    12191             idxGstRegBase  = X86_GREG_xBX;
    12192             idxGstRegIndex = UINT8_MAX;
    12193             break;
    12194     }
    12195 
    12196     /*
    12197      * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
    12198      */
    12199     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    12200     uint8_t const idxRegBase  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    12201                                                                kIemNativeGstRegUse_ReadOnly);
    12202     uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
    12203                               ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    12204                                                                kIemNativeGstRegUse_ReadOnly)
    12205                               : UINT8_MAX;
    12206 #ifdef RT_ARCH_AMD64
    12207     if (idxRegIndex == UINT8_MAX)
    12208     {
    12209         if (u16EffAddr == 0)
    12210         {
    12211             /* movxz ret, base */
    12212             off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
    12213         }
    12214         else
    12215         {
    12216             /* lea ret32, [base64 + disp32] */
    12217             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    12218             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    12219             if (idxRegRet >= 8 || idxRegBase >= 8)
    12220                 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
    12221             pbCodeBuf[off++] = 0x8d;
    12222             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    12223                 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
    12224             else
    12225             {
    12226                 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
    12227                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    12228             }
    12229             pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
    12230             pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
    12231             pbCodeBuf[off++] = 0;
    12232             pbCodeBuf[off++] = 0;
    12233             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12234 
    12235             off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
    12236         }
    12237     }
    12238     else
    12239     {
    12240         /* lea ret32, [index64 + base64 (+ disp32)] */
    12241         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    12242         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    12243         if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    12244             pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    12245                              | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    12246                              | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    12247         pbCodeBuf[off++] = 0x8d;
    12248         uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
    12249         pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    12250         pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
    12251         if (bMod == X86_MOD_MEM4)
    12252         {
    12253             pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
    12254             pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
    12255             pbCodeBuf[off++] = 0;
    12256             pbCodeBuf[off++] = 0;
    12257         }
    12258         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12259         off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
    12260     }
    12261 
    12262 #elif defined(RT_ARCH_ARM64)
    12263     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    12264     if (u16EffAddr == 0)
    12265     {
    12266         if (idxRegIndex == UINT8_MAX)
    12267             pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
    12268         else
    12269         {
    12270             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
    12271             pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
    12272         }
    12273     }
    12274     else
    12275     {
    12276         if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
    12277             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
    12278         else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
    12279             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
    12280                                                              (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
    12281         else
    12282         {
    12283             pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
    12284             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
    12285         }
    12286         if (idxRegIndex != UINT8_MAX)
    12287             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
    12288         pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
    12289     }
    12290 
    12291 #else
    12292 # error "port me"
    12293 #endif
    12294 
    12295     if (idxRegIndex != UINT8_MAX)
    12296         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    12297     iemNativeRegFreeTmp(pReNative, idxRegBase);
    12298     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    12299     return off;
    12300 }
    12301 
    12302 
    12303 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
    12304     off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
    12305 
    12306 /** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
    12307  * @see iemOpHlpCalcRmEffAddrThreadedAddr32  */
    12308 DECL_INLINE_THROW(uint32_t)
    12309 iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    12310                                          uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
    12311 {
    12312     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    12313 
    12314     /*
    12315      * Handle the disp32 form with no registers first.
    12316      *
    12317      * Convert to an immediate value, as that'll delay the register allocation
    12318      * and assignment till the memory access / call / whatever and we can use
    12319      * a more appropriate register (or none at all).
    12320      */
    12321     if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
    12322     {
    12323         iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
    12324         return off;
    12325     }
    12326 
    12327     /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
    12328     uint32_t u32EffAddr = 0;
    12329     switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    12330     {
    12331         case 0: break;
    12332         case 1: u32EffAddr = (int8_t)u32Disp; break;
    12333         case 2: u32EffAddr = u32Disp; break;
    12334         default: AssertFailed();
    12335     }
    12336 
    12337     /* Get the register (or SIB) value. */
    12338     uint8_t idxGstRegBase  = UINT8_MAX;
    12339     uint8_t idxGstRegIndex = UINT8_MAX;
    12340     uint8_t cShiftIndex    = 0;
    12341     switch (bRm & X86_MODRM_RM_MASK)
    12342     {
    12343         case 0: idxGstRegBase = X86_GREG_xAX; break;
    12344         case 1: idxGstRegBase = X86_GREG_xCX; break;
    12345         case 2: idxGstRegBase = X86_GREG_xDX; break;
    12346         case 3: idxGstRegBase = X86_GREG_xBX; break;
    12347         case 4: /* SIB */
    12348         {
    12349             /* index /w scaling . */
    12350             cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
    12351             switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
    12352             {
    12353                 case 0: idxGstRegIndex = X86_GREG_xAX; break;
    12354                 case 1: idxGstRegIndex = X86_GREG_xCX; break;
    12355                 case 2: idxGstRegIndex = X86_GREG_xDX; break;
    12356                 case 3: idxGstRegIndex = X86_GREG_xBX; break;
    12357                 case 4: cShiftIndex    = 0; /*no index*/ break;
    12358                 case 5: idxGstRegIndex = X86_GREG_xBP; break;
    12359                 case 6: idxGstRegIndex = X86_GREG_xSI; break;
    12360                 case 7: idxGstRegIndex = X86_GREG_xDI; break;
    12361             }
    12362 
    12363             /* base */
    12364             switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
    12365             {
    12366                 case 0: idxGstRegBase = X86_GREG_xAX; break;
    12367                 case 1: idxGstRegBase = X86_GREG_xCX; break;
    12368                 case 2: idxGstRegBase = X86_GREG_xDX; break;
    12369                 case 3: idxGstRegBase = X86_GREG_xBX; break;
    12370                 case 4:
    12371                     idxGstRegBase     = X86_GREG_xSP;
    12372                     u32EffAddr       += uSibAndRspOffset >> 8;
    12373                     break;
    12374                 case 5:
    12375                     if ((bRm & X86_MODRM_MOD_MASK) != 0)
    12376                         idxGstRegBase = X86_GREG_xBP;
    12377                     else
    12378                     {
    12379                         Assert(u32EffAddr == 0);
    12380                         u32EffAddr    = u32Disp;
    12381                     }
    12382                     break;
    12383                 case 6: idxGstRegBase = X86_GREG_xSI; break;
    12384                 case 7: idxGstRegBase = X86_GREG_xDI; break;
    12385             }
    12386             break;
    12387         }
    12388         case 5: idxGstRegBase = X86_GREG_xBP; break;
    12389         case 6: idxGstRegBase = X86_GREG_xSI; break;
    12390         case 7: idxGstRegBase = X86_GREG_xDI; break;
    12391     }
    12392 
    12393     /*
    12394      * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
    12395      * the start of the function.
    12396      */
    12397     if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
    12398     {
    12399         iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
    12400         return off;
    12401     }
    12402 
    12403     /*
    12404      * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    12405      */
    12406     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    12407     uint8_t       idxRegBase  = idxGstRegBase == UINT8_MAX ? UINT8_MAX
    12408                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    12409                                                                 kIemNativeGstRegUse_ReadOnly);
    12410     uint8_t       idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
    12411                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    12412                                                                kIemNativeGstRegUse_ReadOnly);
    12413 
    12414     /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
    12415     if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
    12416     {
    12417         idxRegBase  = idxRegIndex;
    12418         idxRegIndex = UINT8_MAX;
    12419     }
    12420 
    12421 #ifdef RT_ARCH_AMD64
    12422     if (idxRegIndex == UINT8_MAX)
    12423     {
    12424         if (u32EffAddr == 0)
    12425         {
    12426             /* mov ret, base */
    12427             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    12428         }
    12429         else
    12430         {
    12431             /* lea ret32, [base64 + disp32] */
    12432             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    12433             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    12434             if (idxRegRet >= 8 || idxRegBase >= 8)
    12435                 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
    12436             pbCodeBuf[off++] = 0x8d;
    12437             uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
    12438             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    12439                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
    12440             else
    12441             {
    12442                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    12443                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    12444             }
    12445             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    12446             if (bMod == X86_MOD_MEM4)
    12447             {
    12448                 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    12449                 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    12450                 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    12451             }
    12452             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12453         }
    12454     }
    12455     else
    12456     {
    12457         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    12458         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    12459         if (idxRegBase == UINT8_MAX)
    12460         {
    12461             /* lea ret32, [(index64 << cShiftIndex) + disp32] */
    12462             if (idxRegRet >= 8 || idxRegIndex >= 8)
    12463                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    12464                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    12465             pbCodeBuf[off++] = 0x8d;
    12466             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
    12467             pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
    12468             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    12469             pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    12470             pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    12471             pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    12472         }
    12473         else
    12474         {
    12475             /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
    12476             if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    12477                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    12478                                  | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    12479                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    12480             pbCodeBuf[off++] = 0x8d;
    12481             uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
    12482                                : (int8_t)u32EffAddr == (int32_t)u32EffAddr           ? X86_MOD_MEM1 : X86_MOD_MEM4;
    12483             pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    12484             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
    12485             if (bMod != X86_MOD_MEM0)
    12486             {
    12487                 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    12488                 if (bMod == X86_MOD_MEM4)
    12489                 {
    12490                     pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    12491                     pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    12492                     pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    12493                 }
    12494             }
    12495         }
    12496         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12497     }
    12498 
    12499 #elif defined(RT_ARCH_ARM64)
    12500     if (u32EffAddr == 0)
    12501     {
    12502         if (idxRegIndex == UINT8_MAX)
    12503             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    12504         else if (idxRegBase == UINT8_MAX)
    12505         {
    12506             if (cShiftIndex == 0)
    12507                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
    12508             else
    12509             {
    12510                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12511                 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
    12512             }
    12513         }
    12514         else
    12515         {
    12516             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12517             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
    12518                                                           false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
    12519         }
    12520     }
    12521     else
    12522     {
    12523         if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
    12524         {
    12525             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12526             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
    12527         }
    12528         else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
    12529         {
    12530             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12531             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
    12532                                                              (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
    12533         }
    12534         else
    12535         {
    12536             off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
    12537             if (idxRegBase != UINT8_MAX)
    12538             {
    12539                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12540                 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
    12541             }
    12542         }
    12543         if (idxRegIndex != UINT8_MAX)
    12544         {
    12545             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12546             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
    12547                                                           false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
    12548         }
    12549     }
    12550 
    12551 #else
    12552 # error "port me"
    12553 #endif
    12554 
    12555     if (idxRegIndex != UINT8_MAX)
    12556         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    12557     if (idxRegBase != UINT8_MAX)
    12558         iemNativeRegFreeTmp(pReNative, idxRegBase);
    12559     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    12560     return off;
    12561 }
    12562 
    12563 
    12564 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    12565     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    12566                                                    a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
    12567 
    12568 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    12569     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    12570                                                    a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
    12571 
    12572 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    12573     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    12574                                                    a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
    12575 
    12576 /**
    12577  * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
    12578  *
    12579  * @returns New off.
    12580  * @param   pReNative           .
    12581  * @param   off                 .
    12582  * @param   bRmEx               The ModRM byte but with bit 3 set to REX.B and
    12583  *                              bit 4 to REX.X.  The two bits are part of the
    12584  *                              REG sub-field, which isn't needed in this
    12585  *                              function.
    12586  * @param   uSibAndRspOffset    Two parts:
    12587  *                                - The first 8 bits make up the SIB byte.
    12588  *                                - The next 8 bits are the fixed RSP/ESP offset
    12589  *                                  in case of a pop [xSP].
    12590  * @param   u32Disp             The displacement byte/word/dword, if any.
    12591  * @param   cbInstr             The size of the fully decoded instruction. Used
    12592  *                              for RIP relative addressing.
    12593  * @param   idxVarRet           The result variable number.
    12594  * @param   f64Bit              Whether to use a 64-bit or 32-bit address size
    12595  *                              when calculating the address.
    12596  *
    12597  * @see iemOpHlpCalcRmEffAddrThreadedAddr64
    12598  */
    12599 DECL_INLINE_THROW(uint32_t)
    12600 iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
    12601                                          uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
    12602 {
    12603     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    12604 
    12605     /*
    12606      * Special case the rip + disp32 form first.
    12607      */
    12608     if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
    12609     {
    12610 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    12611         /* Need to take the current PC offset into account for the displacement, no need to flush here
    12612          * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
    12613         u32Disp += pReNative->Core.offPc;
    12614 #endif
    12615 
    12616         uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    12617         uint8_t const idxRegPc  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
    12618                                                                   kIemNativeGstRegUse_ReadOnly);
    12619 #ifdef RT_ARCH_AMD64
    12620         if (f64Bit)
    12621         {
    12622             int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
    12623             if ((int32_t)offFinalDisp == offFinalDisp)
    12624                 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
    12625             else
    12626             {
    12627                 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
    12628                 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
    12629             }
    12630         }
    12631         else
    12632             off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
    12633 
    12634 #elif defined(RT_ARCH_ARM64)
    12635         if (f64Bit)
    12636             off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
    12637                                                                  (int64_t)(int32_t)u32Disp + cbInstr);
    12638         else
    12639             off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
    12640                                                                    (int32_t)u32Disp + cbInstr);
    12641 
    12642 #else
    12643 # error "Port me!"
    12644 #endif
    12645         iemNativeRegFreeTmp(pReNative, idxRegPc);
    12646         iemNativeVarRegisterRelease(pReNative, idxVarRet);
    12647         return off;
    12648     }
    12649 
    12650     /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
    12651     int64_t i64EffAddr = 0;
    12652     switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    12653     {
    12654         case 0: break;
    12655         case 1: i64EffAddr = (int8_t)u32Disp; break;
    12656         case 2: i64EffAddr = (int32_t)u32Disp; break;
    12657         default: AssertFailed();
    12658     }
    12659 
    12660     /* Get the register (or SIB) value. */
    12661     uint8_t idxGstRegBase  = UINT8_MAX;
    12662     uint8_t idxGstRegIndex = UINT8_MAX;
    12663     uint8_t cShiftIndex    = 0;
    12664     if ((bRmEx & X86_MODRM_RM_MASK) != 4)
    12665         idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
    12666     else /* SIB: */
    12667     {
    12668         /* index /w scaling . */
    12669         cShiftIndex    = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
    12670         idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
    12671                        | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
    12672         if (idxGstRegIndex == 4)
    12673         {
    12674             /* no index */
    12675             cShiftIndex    = 0;
    12676             idxGstRegIndex = UINT8_MAX;
    12677         }
    12678 
    12679         /* base */
    12680         idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
    12681         if (idxGstRegBase == 4)
    12682         {
    12683             /* pop [rsp] hack */
    12684             i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
    12685         }
    12686         else if (   (idxGstRegBase & X86_SIB_BASE_MASK) == 5
    12687                  && (bRmEx & X86_MODRM_MOD_MASK) == 0)
    12688         {
    12689             /* mod=0 and base=5 -> disp32, no base reg. */
    12690             Assert(i64EffAddr == 0);
    12691             i64EffAddr    = (int32_t)u32Disp;
    12692             idxGstRegBase = UINT8_MAX;
    12693         }
    12694     }
    12695 
    12696     /*
    12697      * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
    12698      * the start of the function.
    12699      */
    12700     if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
    12701     {
    12702         if (f64Bit)
    12703             iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
    12704         else
    12705             iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
    12706         return off;
    12707     }
    12708 
    12709     /*
    12710      * Now emit code that calculates:
    12711      *      idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    12712      * or if !f64Bit:
    12713      *      idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    12714      */
    12715     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    12716     uint8_t       idxRegBase  = idxGstRegBase == UINT8_MAX ? UINT8_MAX
    12717                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    12718                                                                 kIemNativeGstRegUse_ReadOnly);
    12719     uint8_t       idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
    12720                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    12721                                                                kIemNativeGstRegUse_ReadOnly);
    12722 
    12723     /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
    12724     if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
    12725     {
    12726         idxRegBase  = idxRegIndex;
    12727         idxRegIndex = UINT8_MAX;
    12728     }
    12729 
    12730 #ifdef RT_ARCH_AMD64
    12731     uint8_t bFinalAdj;
    12732     if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
    12733         bFinalAdj = 0; /* likely */
    12734     else
    12735     {
    12736         /* pop [rsp] with a problematic disp32 value.  Split out the
    12737            RSP offset and add it separately afterwards (bFinalAdj). */
    12738         /** @todo testcase: pop [rsp] with problematic disp32 (mod4).   */
    12739         Assert(idxGstRegBase == X86_GREG_xSP);
    12740         Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
    12741         bFinalAdj   = (uint8_t)(uSibAndRspOffset >> 8);
    12742         Assert(bFinalAdj != 0);
    12743         i64EffAddr -= bFinalAdj;
    12744         Assert((int32_t)i64EffAddr == i64EffAddr);
    12745     }
    12746     uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
    12747 //pReNative->pInstrBuf[off++] = 0xcc;
    12748 
    12749     if (idxRegIndex == UINT8_MAX)
    12750     {
    12751         if (u32EffAddr == 0)
    12752         {
    12753             /* mov ret, base */
    12754             if (f64Bit)
    12755                 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
    12756             else
    12757                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    12758         }
    12759         else
    12760         {
    12761             /* lea ret, [base + disp32] */
    12762             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    12763             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    12764             if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
    12765                 pbCodeBuf[off++] = (idxRegRet  >= 8 ? X86_OP_REX_R : 0)
    12766                                  | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
    12767                                  | (f64Bit          ? X86_OP_REX_W : 0);
    12768             pbCodeBuf[off++] = 0x8d;
    12769             uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
    12770             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    12771                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
    12772             else
    12773             {
    12774                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    12775                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    12776             }
    12777             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    12778             if (bMod == X86_MOD_MEM4)
    12779             {
    12780                 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    12781                 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    12782                 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    12783             }
    12784             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12785         }
    12786     }
    12787     else
    12788     {
    12789         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    12790         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    12791         if (idxRegBase == UINT8_MAX)
    12792         {
    12793             /* lea ret, [(index64 << cShiftIndex) + disp32] */
    12794             if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
    12795                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    12796                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
    12797                                  | (f64Bit           ? X86_OP_REX_W : 0);
    12798             pbCodeBuf[off++] = 0x8d;
    12799             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
    12800             pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
    12801             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    12802             pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    12803             pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    12804             pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    12805         }
    12806         else
    12807         {
    12808             /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
    12809             if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    12810                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    12811                                  | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    12812                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
    12813                                  | (f64Bit           ? X86_OP_REX_W : 0);
    12814             pbCodeBuf[off++] = 0x8d;
    12815             uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
    12816                                : (int8_t)u32EffAddr == (int32_t)u32EffAddr           ? X86_MOD_MEM1 : X86_MOD_MEM4;
    12817             pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    12818             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
    12819             if (bMod != X86_MOD_MEM0)
    12820             {
    12821                 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    12822                 if (bMod == X86_MOD_MEM4)
    12823                 {
    12824                     pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    12825                     pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    12826                     pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    12827                 }
    12828             }
    12829         }
    12830         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12831     }
    12832 
    12833     if (!bFinalAdj)
    12834     { /* likely */ }
    12835     else
    12836     {
    12837         Assert(f64Bit);
    12838         off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
    12839     }
    12840 
    12841 #elif defined(RT_ARCH_ARM64)
    12842     if (i64EffAddr == 0)
    12843     {
    12844         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12845         if (idxRegIndex == UINT8_MAX)
    12846             pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
    12847         else if (idxRegBase != UINT8_MAX)
    12848             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
    12849                                                           f64Bit, false /*fSetFlags*/, cShiftIndex);
    12850         else
    12851         {
    12852             Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
    12853             pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
    12854         }
    12855     }
    12856     else
    12857     {
    12858         if (f64Bit)
    12859         { /* likely */ }
    12860         else
    12861             i64EffAddr = (int32_t)i64EffAddr;
    12862 
    12863         if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
    12864         {
    12865             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12866             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
    12867         }
    12868         else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
    12869         {
    12870             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12871             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
    12872         }
    12873         else
    12874         {
    12875             if (f64Bit)
    12876                 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
    12877             else
    12878                 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
    12879             if (idxRegBase != UINT8_MAX)
    12880             {
    12881                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12882                 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
    12883             }
    12884         }
    12885         if (idxRegIndex != UINT8_MAX)
    12886         {
    12887             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    12888             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
    12889                                                           f64Bit, false /*fSetFlags*/, cShiftIndex);
    12890         }
    12891     }
    12892 
    12893 #else
    12894 # error "port me"
    12895 #endif
    12896 
    12897     if (idxRegIndex != UINT8_MAX)
    12898         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    12899     if (idxRegBase != UINT8_MAX)
    12900         iemNativeRegFreeTmp(pReNative, idxRegBase);
    12901     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    12902     return off;
    12903 }
    12904 
    129058176
    129068177/*********************************************************************************************************************************
     
    129598230
    129608231
    12961 /*********************************************************************************************************************************
    12962 *   Memory fetches and stores common                                                                                             *
    12963 *********************************************************************************************************************************/
    12964 
    12965 typedef enum IEMNATIVEMITMEMOP
    12966 {
    12967     kIemNativeEmitMemOp_Store = 0,
    12968     kIemNativeEmitMemOp_Fetch,
    12969     kIemNativeEmitMemOp_Fetch_Zx_U16,
    12970     kIemNativeEmitMemOp_Fetch_Zx_U32,
    12971     kIemNativeEmitMemOp_Fetch_Zx_U64,
    12972     kIemNativeEmitMemOp_Fetch_Sx_U16,
    12973     kIemNativeEmitMemOp_Fetch_Sx_U32,
    12974     kIemNativeEmitMemOp_Fetch_Sx_U64
    12975 } IEMNATIVEMITMEMOP;
    12976 
    12977 /** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
    12978  * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
    12979  * (with iSegReg = UINT8_MAX). */
    12980 DECL_INLINE_THROW(uint32_t)
    12981 iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off,  uint8_t idxVarValue, uint8_t iSegReg,
    12982                                      uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
    12983                                      uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
    12984 {
    12985     /*
    12986      * Assert sanity.
    12987      */
    12988     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
    12989     PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
    12990     Assert(   enmOp != kIemNativeEmitMemOp_Store
    12991            || pVarValue->enmKind == kIemNativeVarKind_Immediate
    12992            || pVarValue->enmKind == kIemNativeVarKind_Stack);
    12993     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
    12994     PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
    12995     AssertStmt(   pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
    12996                || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
    12997                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    12998     Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
    12999     Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
    13000     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    13001 #ifdef VBOX_STRICT
    13002     if (iSegReg == UINT8_MAX)
    13003     {
    13004         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    13005                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    13006                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    13007         switch (cbMem)
    13008         {
    13009             case 1:
    13010                 Assert(   pfnFunction
    13011                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
    13012                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    13013                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    13014                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    13015                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    13016                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
    13017                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
    13018                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
    13019                            : UINT64_C(0xc000b000a0009000) ));
    13020                 break;
    13021             case 2:
    13022                 Assert(   pfnFunction
    13023                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
    13024                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    13025                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    13026                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    13027                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
    13028                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
    13029                            : UINT64_C(0xc000b000a0009000) ));
    13030                 break;
    13031             case 4:
    13032                 Assert(   pfnFunction
    13033                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
    13034                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
    13035                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
    13036                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
    13037                            : UINT64_C(0xc000b000a0009000) ));
    13038                 break;
    13039             case 8:
    13040                 Assert(    pfnFunction
    13041                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
    13042                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
    13043                            : UINT64_C(0xc000b000a0009000) ));
    13044                 break;
    13045         }
    13046     }
    13047     else
    13048     {
    13049         Assert(iSegReg < 6);
    13050         switch (cbMem)
    13051         {
    13052             case 1:
    13053                 Assert(   pfnFunction
    13054                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU8
    13055                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU8
    13056                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    13057                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    13058                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    13059                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
    13060                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
    13061                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
    13062                            : UINT64_C(0xc000b000a0009000) ));
    13063                 break;
    13064             case 2:
    13065                 Assert(   pfnFunction
    13066                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU16
    13067                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU16
    13068                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
    13069                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
    13070                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
    13071                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
    13072                            : UINT64_C(0xc000b000a0009000) ));
    13073                 break;
    13074             case 4:
    13075                 Assert(   pfnFunction
    13076                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU32
    13077                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU32
    13078                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
    13079                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
    13080                            : UINT64_C(0xc000b000a0009000) ));
    13081                 break;
    13082             case 8:
    13083                 Assert(    pfnFunction
    13084                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU64
    13085                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU64
    13086                            : UINT64_C(0xc000b000a0009000) ));
    13087                 break;
    13088         }
    13089     }
    13090 #endif
    13091 
    13092 #ifdef VBOX_STRICT
    13093     /*
    13094      * Check that the fExec flags we've got make sense.
    13095      */
    13096     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    13097 #endif
    13098 
    13099     /*
    13100      * To keep things simple we have to commit any pending writes first as we
    13101      * may end up making calls.
    13102      */
    13103     /** @todo we could postpone this till we make the call and reload the
    13104      * registers after returning from the call. Not sure if that's sensible or
    13105      * not, though. */
    13106 #ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    13107     off = iemNativeRegFlushPendingWrites(pReNative, off);
    13108 #else
    13109     /* The program counter is treated differently for now. */
    13110     off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
    13111 #endif
    13112 
    13113 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13114     /*
    13115      * Move/spill/flush stuff out of call-volatile registers.
    13116      * This is the easy way out. We could contain this to the tlb-miss branch
    13117      * by saving and restoring active stuff here.
    13118      */
    13119     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    13120 #endif
    13121 
    13122     /*
    13123      * Define labels and allocate the result register (trying for the return
    13124      * register if we can).
    13125      */
    13126     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    13127     uint8_t  const idxRegValueFetch  = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
    13128                                      : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
    13129                                      ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
    13130                                      : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
    13131     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
    13132     uint8_t  const idxRegValueStore  =    !TlbState.fSkip
    13133                                        && enmOp == kIemNativeEmitMemOp_Store
    13134                                        && pVarValue->enmKind != kIemNativeVarKind_Immediate
    13135                                      ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
    13136                                      : UINT8_MAX;
    13137     uint32_t const idxRegMemResult   = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
    13138     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    13139                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    13140                                      : UINT32_MAX;
    13141 
    13142     /*
    13143      * Jump to the TLB lookup code.
    13144      */
    13145     if (!TlbState.fSkip)
    13146         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    13147 
    13148     /*
    13149      * TlbMiss:
    13150      *
    13151      * Call helper to do the fetching.
    13152      * We flush all guest register shadow copies here.
    13153      */
    13154     uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
    13155 
    13156 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    13157     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    13158 #else
    13159     RT_NOREF(idxInstr);
    13160 #endif
    13161 
    13162 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    13163     if (pReNative->Core.offPc)
    13164     {
    13165         /*
    13166          * Update the program counter but restore it at the end of the TlbMiss branch.
    13167          * This should allow delaying more program counter updates for the TlbLookup and hit paths
    13168          * which are hopefully much more frequent, reducing the amount of memory accesses.
    13169          */
    13170         /* Allocate a temporary PC register. */
    13171         uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    13172 
    13173         /* Perform the addition and store the result. */
    13174         off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
    13175         off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    13176 
    13177         /* Free and flush the PC register. */
    13178         iemNativeRegFreeTmp(pReNative, idxPcReg);
    13179         iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
    13180     }
    13181 #endif
    13182 
    13183 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13184     /* Save variables in volatile registers. */
    13185     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    13186                                      | (idxRegMemResult  != UINT8_MAX ? RT_BIT_32(idxRegMemResult)  : 0)
    13187                                      | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
    13188     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    13189 #endif
    13190 
    13191     /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
    13192     uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    13193     if (enmOp == kIemNativeEmitMemOp_Store)
    13194     {
    13195         uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
    13196         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
    13197 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13198                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    13199 #else
    13200                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
    13201         fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
    13202 #endif
    13203     }
    13204 
    13205     /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
    13206     off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
    13207 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13208                                                     fVolGregMask);
    13209 #else
    13210                                                     fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
    13211 #endif
    13212 
    13213     if (iSegReg != UINT8_MAX)
    13214     {
    13215         /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
    13216         AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
    13217         off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
    13218     }
    13219 
    13220     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    13221     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    13222 
    13223     /* Done setting up parameters, make the call. */
    13224     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    13225 
    13226     /*
    13227      * Put the result in the right register if this is a fetch.
    13228      */
    13229     if (enmOp != kIemNativeEmitMemOp_Store)
    13230     {
    13231         Assert(idxRegValueFetch == pVarValue->idxReg);
    13232         if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
    13233             off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
    13234     }
    13235 
    13236 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13237     /* Restore variables and guest shadow registers to volatile registers. */
    13238     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    13239     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    13240 #endif
    13241 
    13242 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    13243     if (pReNative->Core.offPc)
    13244     {
    13245         /*
    13246          * Time to restore the program counter to its original value.
    13247          */
    13248         /* Allocate a temporary PC register. */
    13249         uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    13250 
    13251         /* Restore the original value. */
    13252         off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
    13253         off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    13254 
    13255         /* Free and flush the PC register. */
    13256         iemNativeRegFreeTmp(pReNative, idxPcReg);
    13257         iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
    13258     }
    13259 #endif
    13260 
    13261 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    13262     if (!TlbState.fSkip)
    13263     {
    13264         /* end of TlbMiss - Jump to the done label. */
    13265         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    13266         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    13267 
    13268         /*
    13269          * TlbLookup:
    13270          */
    13271         off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
    13272                                            enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
    13273                                            idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
    13274 
    13275         /*
    13276          * Emit code to do the actual storing / fetching.
    13277          */
    13278         PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
    13279 # ifdef VBOX_WITH_STATISTICS
    13280         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    13281                                                   enmOp == kIemNativeEmitMemOp_Store
    13282                                                   ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
    13283                                                   : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
    13284 # endif
    13285         switch (enmOp)
    13286         {
    13287             case kIemNativeEmitMemOp_Store:
    13288                 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
    13289                 {
    13290                     switch (cbMem)
    13291                     {
    13292                         case 1:
    13293                             off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    13294                             break;
    13295                         case 2:
    13296                             off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    13297                             break;
    13298                         case 4:
    13299                             off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    13300                             break;
    13301                         case 8:
    13302                             off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    13303                             break;
    13304                         default:
    13305                             AssertFailed();
    13306                     }
    13307                 }
    13308                 else
    13309                 {
    13310                     switch (cbMem)
    13311                     {
    13312                         case 1:
    13313                             off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
    13314                                                                 idxRegMemResult, TlbState.idxReg1);
    13315                             break;
    13316                         case 2:
    13317                             off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
    13318                                                                  idxRegMemResult, TlbState.idxReg1);
    13319                             break;
    13320                         case 4:
    13321                             off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
    13322                                                                  idxRegMemResult, TlbState.idxReg1);
    13323                             break;
    13324                         case 8:
    13325                             off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
    13326                                                                  idxRegMemResult, TlbState.idxReg1);
    13327                             break;
    13328                         default:
    13329                             AssertFailed();
    13330                     }
    13331                 }
    13332                 break;
    13333 
    13334             case kIemNativeEmitMemOp_Fetch:
    13335             case kIemNativeEmitMemOp_Fetch_Zx_U16:
    13336             case kIemNativeEmitMemOp_Fetch_Zx_U32:
    13337             case kIemNativeEmitMemOp_Fetch_Zx_U64:
    13338                 switch (cbMem)
    13339                 {
    13340                     case 1:
    13341                         off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13342                         break;
    13343                     case 2:
    13344                         off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13345                         break;
    13346                     case 4:
    13347                         off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13348                         break;
    13349                     case 8:
    13350                         off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13351                         break;
    13352                     default:
    13353                         AssertFailed();
    13354                 }
    13355                 break;
    13356 
    13357             case kIemNativeEmitMemOp_Fetch_Sx_U16:
    13358                 Assert(cbMem == 1);
    13359                 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13360                 break;
    13361 
    13362             case kIemNativeEmitMemOp_Fetch_Sx_U32:
    13363                 Assert(cbMem == 1 || cbMem == 2);
    13364                 if (cbMem == 1)
    13365                     off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13366                 else
    13367                     off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13368                 break;
    13369 
    13370             case kIemNativeEmitMemOp_Fetch_Sx_U64:
    13371                 switch (cbMem)
    13372                 {
    13373                     case 1:
    13374                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13375                         break;
    13376                     case 2:
    13377                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13378                         break;
    13379                     case 4:
    13380                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    13381                         break;
    13382                     default:
    13383                         AssertFailed();
    13384                 }
    13385                 break;
    13386 
    13387             default:
    13388                 AssertFailed();
    13389         }
    13390 
    13391         iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    13392 
    13393         /*
    13394          * TlbDone:
    13395          */
    13396         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    13397 
    13398         TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
    13399 
    13400 # ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13401         /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
    13402         iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    13403 # endif
    13404     }
    13405 #else
    13406     RT_NOREF(fAlignMask, idxLabelTlbMiss);
    13407 #endif
    13408 
    13409     if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
    13410         iemNativeVarRegisterRelease(pReNative, idxVarValue);
    13411     return off;
    13412 }
    13413 
    13414 
    13415 
    13416 /*********************************************************************************************************************************
    13417 *   Memory fetches (IEM_MEM_FETCH_XXX).                                                                                          *
    13418 *********************************************************************************************************************************/
    13419 
    13420 /* 8-bit segmented: */
    13421 #define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
    13422     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
    13423                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
    13424                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    13425 
    13426 #define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    13427     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    13428                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
    13429                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    13430 
    13431 #define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    13432     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    13433                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    13434                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    13435 
    13436 #define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    13437     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    13438                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    13439                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    13440 
    13441 #define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    13442     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    13443                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
    13444                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
    13445 
    13446 #define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    13447     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    13448                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    13449                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
    13450 
    13451 #define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    13452     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    13453                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    13454                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
    13455 
    13456 /* 16-bit segmented: */
    13457 #define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    13458     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    13459                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13460                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    13461 
    13462 #define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
    13463     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    13464                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13465                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
    13466 
    13467 #define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    13468     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    13469                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    13470                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    13471 
    13472 #define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    13473     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    13474                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    13475                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    13476 
    13477 #define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    13478     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    13479                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    13480                                                (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
    13481 
    13482 #define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    13483     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    13484                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    13485                                                (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
    13486 
    13487 
    13488 /* 32-bit segmented: */
    13489 #define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    13490     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    13491                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13492                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
    13493 
    13494 #define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
    13495     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    13496                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13497                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
    13498 
    13499 #define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    13500     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    13501                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    13502                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
    13503 
    13504 #define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    13505     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    13506                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    13507                                                (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
    13508 
    13509 
    13510 /* 64-bit segmented: */
    13511 #define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    13512     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    13513                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13514                                                (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
    13515 
    13516 
    13517 
    13518 /* 8-bit flat: */
    13519 #define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
    13520     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
    13521                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
    13522                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    13523 
    13524 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
    13525     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    13526                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
    13527                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    13528 
    13529 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
    13530     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    13531                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    13532                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    13533 
    13534 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
    13535     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    13536                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    13537                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    13538 
    13539 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
    13540     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    13541                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
    13542                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
    13543 
    13544 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
    13545     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    13546                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    13547                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
    13548 
    13549 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
    13550     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    13551                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    13552                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
    13553 
    13554 
    13555 /* 16-bit flat: */
    13556 #define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
    13557     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    13558                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13559                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    13560 
    13561 #define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
    13562     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    13563                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13564                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
    13565 
    13566 #define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
    13567     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    13568                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    13569                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    13570 
    13571 #define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
    13572     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    13573                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    13574                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    13575 
    13576 #define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
    13577     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    13578                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    13579                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
    13580 
    13581 #define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
    13582     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    13583                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    13584                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
    13585 
    13586 /* 32-bit flat: */
    13587 #define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
    13588     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    13589                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13590                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
    13591 
    13592 #define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
    13593     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    13594                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13595                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
    13596 
    13597 #define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
    13598     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    13599                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    13600                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
    13601 
    13602 #define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
    13603     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    13604                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    13605                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
    13606 
    13607 /* 64-bit flat: */
    13608 #define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
    13609     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    13610                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
    13611                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
    13612 
    13613 
    13614 
    13615 /*********************************************************************************************************************************
    13616 *   Memory stores (IEM_MEM_STORE_XXX).                                                                                           *
    13617 *********************************************************************************************************************************/
    13618 
    13619 #define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
    13620     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
    13621                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
    13622                                                (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
    13623 
    13624 #define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
    13625     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
    13626                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
    13627                                                (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
    13628 
    13629 #define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
    13630     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
    13631                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
    13632                                                (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
    13633 
    13634 #define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
    13635     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
    13636                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
    13637                                                (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
    13638 
    13639 
    13640 #define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
    13641     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
    13642                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
    13643                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
    13644 
    13645 #define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
    13646     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
    13647                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
    13648                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
    13649 
    13650 #define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
    13651     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
    13652                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
    13653                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
    13654 
    13655 #define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
    13656     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
    13657                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
    13658                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
    13659 
    13660 
    13661 #define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
    13662     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    13663                                                (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
    13664 
    13665 #define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
    13666     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    13667                                                (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
    13668 
    13669 #define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
    13670     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    13671                                                (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
    13672 
    13673 #define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
    13674     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    13675                                                (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
    13676 
    13677 
    13678 #define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
    13679     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    13680                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
    13681 
    13682 #define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
    13683     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    13684                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
    13685 
    13686 #define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
    13687     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    13688                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
    13689 
    13690 #define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
    13691     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    13692                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
    13693 
    13694 /** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
    13695  *  IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
    13696 DECL_INLINE_THROW(uint32_t)
    13697 iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
    13698                                     uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
    13699 {
    13700     /*
    13701      * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
    13702      * to do the grunt work.
    13703      */
    13704     uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
    13705     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
    13706                                                cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
    13707                                                pfnFunction, idxInstr);
    13708     iemNativeVarFreeLocal(pReNative, idxVarConstValue);
    13709     return off;
    13710 }
    13711 
    13712 
    13713 
    13714 /*********************************************************************************************************************************
    13715 *   Stack Accesses.                                                                                                              *
    13716 *********************************************************************************************************************************/
    13717 /*                                                     RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
    13718 #define IEM_MC_PUSH_U16(a_u16Value) \
    13719     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16,  0, 0, 0), \
    13720                                  (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
    13721 #define IEM_MC_PUSH_U32(a_u32Value) \
    13722     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32,  0, 0, 0), \
    13723                                  (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
    13724 #define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
    13725     off = iemNativeEmitStackPush(pReNative, off, a_uSegVal,  RT_MAKE_U32_FROM_U8(32,  0, 1, 0), \
    13726                                  (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
    13727 #define IEM_MC_PUSH_U64(a_u64Value) \
    13728     off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64,  0, 0, 0), \
    13729                                  (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
    13730 
    13731 #define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
    13732     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
    13733                                  (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
    13734 #define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
    13735     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
    13736                                  (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
    13737 #define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
    13738     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
    13739                                  (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
    13740 
    13741 #define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
    13742     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
    13743                                  (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
    13744 #define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
    13745     off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
    13746                                  (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
    13747 
    13748 
    13749 DECL_FORCE_INLINE_THROW(uint32_t)
    13750 iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    13751 {
    13752     /* Use16BitSp: */
    13753 #ifdef RT_ARCH_AMD64
    13754     off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
    13755     off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    13756 #else
    13757     /* sub regeff, regrsp, #cbMem */
    13758     pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
    13759     /* and regeff, regeff, #0xffff */
    13760     Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
    13761     pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0,  false /*f64Bit*/);
    13762     /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
    13763     pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
    13764 #endif
    13765     return off;
    13766 }
    13767 
    13768 
    13769 DECL_FORCE_INLINE(uint32_t)
    13770 iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    13771 {
    13772     /* Use32BitSp: */
    13773     off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
    13774     off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    13775     return off;
    13776 }
    13777 
    13778 
    13779 /** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
    13780 DECL_INLINE_THROW(uint32_t)
    13781 iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
    13782                        uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
    13783 {
    13784     /*
    13785      * Assert sanity.
    13786      */
    13787     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
    13788     PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
    13789 #ifdef VBOX_STRICT
    13790     if (RT_BYTE2(cBitsVarAndFlat) != 0)
    13791     {
    13792         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    13793                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    13794                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    13795         Assert(   pfnFunction
    13796                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
    13797                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
    13798                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
    13799                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
    13800                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
    13801                    : UINT64_C(0xc000b000a0009000) ));
    13802     }
    13803     else
    13804         Assert(   pfnFunction
    13805                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
    13806                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
    13807                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
    13808                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
    13809                    : UINT64_C(0xc000b000a0009000) ));
    13810 #endif
    13811 
    13812 #ifdef VBOX_STRICT
    13813     /*
    13814      * Check that the fExec flags we've got make sense.
    13815      */
    13816     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    13817 #endif
    13818 
    13819     /*
    13820      * To keep things simple we have to commit any pending writes first as we
    13821      * may end up making calls.
    13822      */
    13823     /** @todo we could postpone this till we make the call and reload the
    13824      * registers after returning from the call. Not sure if that's sensible or
    13825      * not, though. */
    13826     off = iemNativeRegFlushPendingWrites(pReNative, off);
    13827 
    13828     /*
    13829      * First we calculate the new RSP and the effective stack pointer value.
    13830      * For 64-bit mode and flat 32-bit these two are the same.
    13831      * (Code structure is very similar to that of PUSH)
    13832      */
    13833     uint8_t const cbMem       = RT_BYTE1(cBitsVarAndFlat) / 8;
    13834     bool const    fIsSegReg   = RT_BYTE3(cBitsVarAndFlat) != 0;
    13835     bool const    fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
    13836     uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
    13837                               ? cbMem : sizeof(uint16_t);
    13838     uint8_t const cBitsFlat   = RT_BYTE2(cBitsVarAndFlat);      RT_NOREF(cBitsFlat);
    13839     uint8_t const idxRegRsp   = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
    13840                                                                 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
    13841     uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
    13842     uint32_t      offFixupJumpToUseOtherBitSp = UINT32_MAX;
    13843     if (cBitsFlat != 0)
    13844     {
    13845         Assert(idxRegEffSp == idxRegRsp);
    13846         Assert(cBitsFlat == 32 || cBitsFlat == 64);
    13847         Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
    13848         if (cBitsFlat == 64)
    13849             off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
    13850         else
    13851             off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
    13852     }
    13853     else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
    13854     {
    13855         Assert(idxRegEffSp != idxRegRsp);
    13856         uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
    13857                                                                      kIemNativeGstRegUse_ReadOnly);
    13858 #ifdef RT_ARCH_AMD64
    13859         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    13860 #else
    13861         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    13862 #endif
    13863         off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
    13864         iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
    13865         offFixupJumpToUseOtherBitSp = off;
    13866         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    13867         {
    13868             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
    13869             off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    13870         }
    13871         else
    13872         {
    13873             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
    13874             off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    13875         }
    13876         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    13877     }
    13878     /* SpUpdateEnd: */
    13879     uint32_t const offLabelSpUpdateEnd = off;
    13880 
    13881     /*
    13882      * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
    13883      * we're skipping lookup).
    13884      */
    13885     uint8_t const  iSegReg           = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
    13886     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
    13887     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    13888     uint32_t const idxLabelTlbMiss   = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    13889     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    13890                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    13891                                      : UINT32_MAX;
    13892     uint8_t const  idxRegValue       =    !TlbState.fSkip
    13893                                        && pVarValue->enmKind != kIemNativeVarKind_Immediate
    13894                                      ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
    13895                                                                    IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
    13896                                      : UINT8_MAX;
    13897     uint8_t const  idxRegMemResult   = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
    13898 
    13899 
    13900     if (!TlbState.fSkip)
    13901         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    13902     else
    13903         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
    13904 
    13905     /*
    13906      * Use16BitSp:
    13907      */
    13908     if (cBitsFlat == 0)
    13909     {
    13910 #ifdef RT_ARCH_AMD64
    13911         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    13912 #else
    13913         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    13914 #endif
    13915         iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
    13916         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    13917             off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    13918         else
    13919             off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    13920         off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
    13921         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    13922     }
    13923 
    13924     /*
    13925      * TlbMiss:
    13926      *
    13927      * Call helper to do the pushing.
    13928      */
    13929     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    13930 
    13931 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    13932     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    13933 #else
    13934     RT_NOREF(idxInstr);
    13935 #endif
    13936 
    13937     /* Save variables in volatile registers. */
    13938     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    13939                                      | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
    13940                                      | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
    13941                                      | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
    13942     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    13943 
    13944     if (   idxRegValue == IEMNATIVE_CALL_ARG1_GREG
    13945         && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
    13946     {
    13947         /* Swap them using ARG0 as temp register: */
    13948         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
    13949         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
    13950         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
    13951     }
    13952     else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
    13953     {
    13954         /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
    13955         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
    13956                                                         0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    13957 
    13958         /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
    13959         if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
    13960             off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    13961     }
    13962     else
    13963     {
    13964         /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
    13965         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    13966 
    13967         /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
    13968         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
    13969                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
    13970     }
    13971 
    13972     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    13973     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    13974 
    13975     /* Done setting up parameters, make the call. */
    13976     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    13977 
    13978     /* Restore variables and guest shadow registers to volatile registers. */
    13979     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    13980     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    13981 
    13982 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    13983     if (!TlbState.fSkip)
    13984     {
    13985         /* end of TlbMiss - Jump to the done label. */
    13986         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    13987         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    13988 
    13989         /*
    13990          * TlbLookup:
    13991          */
    13992         off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
    13993                                            IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    13994 
    13995         /*
    13996          * Emit code to do the actual storing / fetching.
    13997          */
    13998         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
    13999 # ifdef VBOX_WITH_STATISTICS
    14000         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    14001                                                   RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
    14002 # endif
    14003         if (idxRegValue != UINT8_MAX)
    14004         {
    14005             switch (cbMemAccess)
    14006             {
    14007                 case 2:
    14008                     off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    14009                     break;
    14010                 case 4:
    14011                     if (!fIsIntelSeg)
    14012                         off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    14013                     else
    14014                     {
    14015                         /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
    14016                            PUSH FS in real mode, so we have to try emulate that here.
    14017                            We borrow the now unused idxReg1 from the TLB lookup code here. */
    14018                         uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
    14019                                                                                             kIemNativeGstReg_EFlags);
    14020                         if (idxRegEfl != UINT8_MAX)
    14021                         {
    14022 #ifdef ARCH_AMD64
    14023                             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
    14024                             off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
    14025                                                              UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    14026 #else
    14027                             off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
    14028                                                                   off, TlbState.idxReg1, idxRegEfl,
    14029                                                                   UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    14030 #endif
    14031                             iemNativeRegFreeTmp(pReNative, idxRegEfl);
    14032                         }
    14033                         else
    14034                         {
    14035                             off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
    14036                                                                   RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
    14037                             off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
    14038                                                              UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    14039                         }
    14040                         /* ASSUMES the upper half of idxRegValue is ZERO. */
    14041                         off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
    14042                         off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
    14043                     }
    14044                     break;
    14045                 case 8:
    14046                     off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    14047                     break;
    14048                 default:
    14049                     AssertFailed();
    14050             }
    14051         }
    14052         else
    14053         {
    14054             switch (cbMemAccess)
    14055             {
    14056                 case 2:
    14057                     off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
    14058                                                          idxRegMemResult, TlbState.idxReg1);
    14059                     break;
    14060                 case 4:
    14061                     Assert(!fIsSegReg);
    14062                     off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
    14063                                                          idxRegMemResult, TlbState.idxReg1);
    14064                     break;
    14065                 case 8:
    14066                     off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
    14067                     break;
    14068                 default:
    14069                     AssertFailed();
    14070             }
    14071         }
    14072 
    14073         iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    14074         TlbState.freeRegsAndReleaseVars(pReNative);
    14075 
    14076         /*
    14077          * TlbDone:
    14078          *
    14079          * Commit the new RSP value.
    14080          */
    14081         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    14082     }
    14083 #endif /* IEMNATIVE_WITH_TLB_LOOKUP */
    14084 
    14085     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
    14086     iemNativeRegFreeTmp(pReNative, idxRegRsp);
    14087     if (idxRegEffSp != idxRegRsp)
    14088         iemNativeRegFreeTmp(pReNative, idxRegEffSp);
    14089 
    14090     /* The value variable is implictly flushed. */
    14091     if (idxRegValue != UINT8_MAX)
    14092         iemNativeVarRegisterRelease(pReNative, idxVarValue);
    14093     iemNativeVarFreeLocal(pReNative, idxVarValue);
    14094 
    14095     return off;
    14096 }
    14097 
    14098 
    14099 
    14100 /*                                                     RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
    14101 #define IEM_MC_POP_GREG_U16(a_iGReg) \
    14102     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16,  0, 0, 0), \
    14103                                     (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
    14104 #define IEM_MC_POP_GREG_U32(a_iGReg) \
    14105     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32,  0, 0, 0), \
    14106                                     (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
    14107 #define IEM_MC_POP_GREG_U64(a_iGReg) \
    14108     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64,  0, 0, 0), \
    14109                                     (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
    14110 
    14111 #define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
    14112     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
    14113                                     (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
    14114 #define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
    14115     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
    14116                                     (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
    14117 
    14118 #define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
    14119     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
    14120                                     (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
    14121 #define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
    14122     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
    14123                                     (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
    14124 
    14125 
    14126 DECL_FORCE_INLINE_THROW(uint32_t)
    14127 iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
    14128                              uint8_t idxRegTmp)
    14129 {
    14130     /* Use16BitSp: */
    14131 #ifdef RT_ARCH_AMD64
    14132     off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    14133     off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
    14134     RT_NOREF(idxRegTmp);
    14135 #else
    14136     /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
    14137     pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
    14138     /* add tmp, regrsp, #cbMem */
    14139     pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
    14140     /* and tmp, tmp, #0xffff */
    14141     Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
    14142     pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0,  false /*f64Bit*/);
    14143     /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
    14144     pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
    14145 #endif
    14146     return off;
    14147 }
    14148 
    14149 
    14150 DECL_FORCE_INLINE(uint32_t)
    14151 iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    14152 {
    14153     /* Use32BitSp: */
    14154     off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    14155     off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
    14156     return off;
    14157 }
    14158 
    14159 
    14160 /** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
    14161 DECL_INLINE_THROW(uint32_t)
    14162 iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
    14163                           uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
    14164 {
    14165     /*
    14166      * Assert sanity.
    14167      */
    14168     Assert(idxGReg < 16);
    14169 #ifdef VBOX_STRICT
    14170     if (RT_BYTE2(cBitsVarAndFlat) != 0)
    14171     {
    14172         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    14173                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    14174                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    14175         Assert(   pfnFunction
    14176                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
    14177                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
    14178                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
    14179                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
    14180                    : UINT64_C(0xc000b000a0009000) ));
    14181     }
    14182     else
    14183         Assert(   pfnFunction
    14184                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
    14185                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
    14186                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
    14187                    : UINT64_C(0xc000b000a0009000) ));
    14188 #endif
    14189 
    14190 #ifdef VBOX_STRICT
    14191     /*
    14192      * Check that the fExec flags we've got make sense.
    14193      */
    14194     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    14195 #endif
    14196 
    14197     /*
    14198      * To keep things simple we have to commit any pending writes first as we
    14199      * may end up making calls.
    14200      */
    14201     off = iemNativeRegFlushPendingWrites(pReNative, off);
    14202 
    14203     /*
    14204      * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
    14205      * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
    14206      * directly as the effective stack pointer.
    14207      * (Code structure is very similar to that of PUSH)
    14208      */
    14209     uint8_t const cbMem           = RT_BYTE1(cBitsVarAndFlat) / 8;
    14210     uint8_t const cBitsFlat       = RT_BYTE2(cBitsVarAndFlat);      RT_NOREF(cBitsFlat);
    14211     uint8_t const idxRegRsp       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
    14212                                                                     kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
    14213     uint8_t const idxRegEffSp     = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
    14214     /** @todo can do a better job picking the register here. For cbMem >= 4 this
    14215      *        will be the resulting register value. */
    14216     uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too.  */
    14217 
    14218     uint32_t      offFixupJumpToUseOtherBitSp = UINT32_MAX;
    14219     if (cBitsFlat != 0)
    14220     {
    14221         Assert(idxRegEffSp == idxRegRsp);
    14222         Assert(cBitsFlat == 32 || cBitsFlat == 64);
    14223         Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
    14224     }
    14225     else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
    14226     {
    14227         Assert(idxRegEffSp != idxRegRsp);
    14228         uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
    14229                                                                      kIemNativeGstRegUse_ReadOnly);
    14230 #ifdef RT_ARCH_AMD64
    14231         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    14232 #else
    14233         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    14234 #endif
    14235         off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
    14236         iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
    14237         offFixupJumpToUseOtherBitSp = off;
    14238         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    14239         {
    14240 /** @todo can skip idxRegRsp updating when popping ESP.   */
    14241             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
    14242             off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    14243         }
    14244         else
    14245         {
    14246             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
    14247             off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
    14248         }
    14249         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    14250     }
    14251     /* SpUpdateEnd: */
    14252     uint32_t const offLabelSpUpdateEnd = off;
    14253 
    14254     /*
    14255      * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
    14256      * we're skipping lookup).
    14257      */
    14258     uint8_t const  iSegReg           = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
    14259     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
    14260     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    14261     uint32_t const idxLabelTlbMiss   = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    14262     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    14263                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    14264                                      : UINT32_MAX;
    14265 
    14266     if (!TlbState.fSkip)
    14267         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    14268     else
    14269         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
    14270 
    14271     /*
    14272      * Use16BitSp:
    14273      */
    14274     if (cBitsFlat == 0)
    14275     {
    14276 #ifdef RT_ARCH_AMD64
    14277         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    14278 #else
    14279         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    14280 #endif
    14281         iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
    14282         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    14283             off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
    14284         else
    14285             off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    14286         off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
    14287         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    14288     }
    14289 
    14290     /*
    14291      * TlbMiss:
    14292      *
    14293      * Call helper to do the pushing.
    14294      */
    14295     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    14296 
    14297 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    14298     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    14299 #else
    14300     RT_NOREF(idxInstr);
    14301 #endif
    14302 
    14303     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    14304                                      | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
    14305                                      | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
    14306     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    14307 
    14308 
    14309     /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
    14310     if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
    14311         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    14312 
    14313     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    14314     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    14315 
    14316     /* Done setting up parameters, make the call. */
    14317     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    14318 
    14319     /* Move the return register content to idxRegMemResult. */
    14320     if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
    14321         off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
    14322 
    14323     /* Restore variables and guest shadow registers to volatile registers. */
    14324     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    14325     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    14326 
    14327 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    14328     if (!TlbState.fSkip)
    14329     {
    14330         /* end of TlbMiss - Jump to the done label. */
    14331         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    14332         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    14333 
    14334         /*
    14335          * TlbLookup:
    14336          */
    14337         off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
    14338                                            idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    14339 
    14340         /*
    14341          * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
    14342          */
    14343         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    14344 # ifdef VBOX_WITH_STATISTICS
    14345         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    14346                                                   RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
    14347 # endif
    14348         switch (cbMem)
    14349         {
    14350             case 2:
    14351                 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    14352                 break;
    14353             case 4:
    14354                 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    14355                 break;
    14356             case 8:
    14357                 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    14358                 break;
    14359             default:
    14360                 AssertFailed();
    14361         }
    14362 
    14363         TlbState.freeRegsAndReleaseVars(pReNative);
    14364 
    14365         /*
    14366          * TlbDone:
    14367          *
    14368          * Set the new RSP value (FLAT accesses needs to calculate it first) and
    14369          * commit the popped register value.
    14370          */
    14371         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    14372     }
    14373 #endif /* IEMNATIVE_WITH_TLB_LOOKUP */
    14374 
    14375     if (idxGReg != X86_GREG_xSP)
    14376     {
    14377         /* Set the register. */
    14378         if (cbMem >= sizeof(uint32_t))
    14379         {
    14380 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    14381             AssertMsg(   pReNative->idxCurCall == 0
    14382                       || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
    14383                       ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
    14384 #endif
    14385             iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult,  IEMNATIVEGSTREG_GPR(idxGReg), off);
    14386             off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
    14387                                                  RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
    14388         }
    14389         else
    14390         {
    14391             Assert(cbMem == sizeof(uint16_t));
    14392             uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
    14393                                                                       kIemNativeGstRegUse_ForUpdate);
    14394             off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
    14395             off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
    14396             iemNativeRegFreeTmp(pReNative, idxRegDst);
    14397         }
    14398 
    14399         /* Complete RSP calculation for FLAT mode. */
    14400         if (idxRegEffSp == idxRegRsp)
    14401         {
    14402             if (cBitsFlat == 64)
    14403                 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
    14404             else
    14405                 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
    14406         }
    14407     }
    14408     else
    14409     {
    14410         /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
    14411         if (cbMem == sizeof(uint64_t))
    14412             off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
    14413         else if (cbMem == sizeof(uint32_t))
    14414             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
    14415         else
    14416         {
    14417             if (idxRegEffSp == idxRegRsp)
    14418             {
    14419                 if (cBitsFlat == 64)
    14420                     off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
    14421                 else
    14422                     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
    14423             }
    14424             off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
    14425         }
    14426     }
    14427     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
    14428 
    14429     iemNativeRegFreeTmp(pReNative, idxRegRsp);
    14430     if (idxRegEffSp != idxRegRsp)
    14431         iemNativeRegFreeTmp(pReNative, idxRegEffSp);
    14432     iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    14433 
    14434     return off;
    14435 }
    14436 
    14437 
    14438 
    14439 /*********************************************************************************************************************************
    14440 *   Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX).                                                                      *
    14441 *********************************************************************************************************************************/
    14442 
    14443 #define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14444     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    14445                                     IEM_ACCESS_DATA_ATOMIC,  0 /*fAlignMask*/, \
    14446                                     (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
    14447 
    14448 #define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14449     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    14450                                     IEM_ACCESS_DATA_RW,  0 /*fAlignMask*/, \
    14451                                     (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
    14452 
    14453 #define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14454     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    14455                                     IEM_ACCESS_DATA_W,  0 /*fAlignMask*/, \
    14456                                     (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
    14457 
    14458 #define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14459     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    14460                                     IEM_ACCESS_DATA_R,  0 /*fAlignMask*/, \
    14461                                     (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
    14462 
    14463 
    14464 #define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14465     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    14466                                     IEM_ACCESS_DATA_ATOMIC,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14467                                     (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
    14468 
    14469 #define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14470     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    14471                                     IEM_ACCESS_DATA_RW,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14472                                     (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
    14473 
    14474 #define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14475     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    14476                                     IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14477                                     (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
    14478 
    14479 #define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14480     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    14481                                     IEM_ACCESS_DATA_R,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14482                                     (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
    14483 
    14484 #define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14485     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
    14486                                     IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14487                                     (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
    14488 
    14489 
    14490 #define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14491     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    14492                                     IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14493                                     (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
    14494 
    14495 #define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14496     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    14497                                     IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14498                                     (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
    14499 
    14500 #define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14501     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    14502                                     IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14503                                     (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
    14504 
    14505 #define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14506     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    14507                                     IEM_ACCESS_DATA_R,  sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14508                                     (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
    14509 
    14510 #define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14511     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
    14512                                     IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14513                                     (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
    14514 
    14515 
    14516 #define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14517     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    14518                                     IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14519                                     (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
    14520 
    14521 #define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14522     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    14523                                     IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14524                                     (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
    14525 #define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14526     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    14527                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14528                                     (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
    14529 
    14530 #define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14531     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    14532                                     IEM_ACCESS_DATA_R,  sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14533                                     (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
    14534 
    14535 #define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14536     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
    14537                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14538                                     (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
    14539 
    14540 
    14541 #define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14542     off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
    14543                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14544                                     (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
    14545 
    14546 #define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14547     off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
    14548                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
    14549                                     (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
    14550 
    14551 
    14552 #define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14553     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    14554                                     IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    14555                                     (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
    14556 
    14557 #define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14558     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    14559                                     IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    14560                                     (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
    14561 
    14562 #define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14563     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    14564                                     IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    14565                                     (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
    14566 
    14567 #define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    14568     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    14569                                     IEM_ACCESS_DATA_R,  sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    14570                                     (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
    14571 
    14572 
    14573 
    14574 #define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    14575     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    14576                                     IEM_ACCESS_DATA_ATOMIC,  0 /*fAlignMask*/, \
    14577                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
    14578 
    14579 #define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    14580     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    14581                                     IEM_ACCESS_DATA_RW,  0 /*fAlignMask*/, \
    14582                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
    14583 
    14584 #define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    14585     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    14586                                     IEM_ACCESS_DATA_W,  0 /*fAlignMask*/, \
    14587                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
    14588 
    14589 #define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    14590     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    14591                                     IEM_ACCESS_DATA_R,  0 /*fAlignMask*/, \
    14592                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
    14593 
    14594 
    14595 #define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    14596     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    14597                                     IEM_ACCESS_DATA_ATOMIC,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14598                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
    14599 
    14600 #define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    14601     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    14602                                     IEM_ACCESS_DATA_RW,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14603                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
    14604 
    14605 #define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    14606     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    14607                                     IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14608                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
    14609 
    14610 #define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    14611     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    14612                                     IEM_ACCESS_DATA_R,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14613                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
    14614 
    14615 #define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
    14616     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
    14617                                     IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    14618                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
    14619 
    14620 
    14621 #define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    14622     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    14623                                     IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14624                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
    14625 
    14626 #define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    14627     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    14628                                     IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14629                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
    14630 
    14631 #define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    14632     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    14633                                     IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14634                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
    14635 
    14636 #define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    14637     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    14638                                     IEM_ACCESS_DATA_R,  sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14639                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
    14640 
    14641 #define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
    14642     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
    14643                                     IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    14644                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
    14645 
    14646 
    14647 #define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    14648     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    14649                                     IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14650                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
    14651 
    14652 #define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    14653     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    14654                                     IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14655                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
    14656 
    14657 #define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    14658     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    14659                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14660                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
    14661 
    14662 #define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    14663     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    14664                                     IEM_ACCESS_DATA_R,  sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14665                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
    14666 
    14667 #define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
    14668     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
    14669                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14670                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
    14671 
    14672 
    14673 #define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
    14674     off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
    14675                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    14676                                     (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
    14677 
    14678 #define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
    14679     off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
    14680                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
    14681                                     (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
    14682 
    14683 
    14684 #define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    14685     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    14686                                     IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    14687                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
    14688 
    14689 #define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    14690     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    14691                                     IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    14692                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
    14693 
    14694 #define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    14695     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    14696                                     IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    14697                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
    14698 
    14699 #define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    14700     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    14701                                     IEM_ACCESS_DATA_R,  sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    14702                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
    14703 
    14704 
    14705 DECL_INLINE_THROW(uint32_t)
    14706 iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
    14707                           uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
    14708                           uintptr_t pfnFunction, uint8_t idxInstr)
    14709 {
    14710     /*
    14711      * Assert sanity.
    14712      */
    14713     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
    14714     PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
    14715     AssertStmt(   pVarMem->enmKind == kIemNativeVarKind_Invalid
    14716                && pVarMem->cbVar   == sizeof(void *),
    14717                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    14718 
    14719     PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
    14720     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
    14721     AssertStmt(   pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
    14722                && pVarUnmapInfo->cbVar   == sizeof(uint8_t),
    14723                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    14724 
    14725     PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
    14726     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
    14727     AssertStmt(   pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
    14728                || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
    14729                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    14730 
    14731     Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
    14732 
    14733     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    14734 
    14735 #ifdef VBOX_STRICT
    14736 # define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
    14737         (  ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
    14738          ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
    14739          : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
    14740          ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
    14741 # define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
    14742         (  ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
    14743          ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
    14744          : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
    14745 
    14746     if (iSegReg == UINT8_MAX)
    14747     {
    14748         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    14749                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    14750                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    14751         switch (cbMem)
    14752         {
    14753             case 1:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
    14754             case 2:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
    14755             case 4:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
    14756             case 8:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
    14757             case 10:
    14758                 Assert(   pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
    14759                        || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
    14760                 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
    14761                 break;
    14762             case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
    14763 # if 0
    14764             case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
    14765             case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
    14766 # endif
    14767             default: AssertFailed(); break;
    14768         }
    14769     }
    14770     else
    14771     {
    14772         Assert(iSegReg < 6);
    14773         switch (cbMem)
    14774         {
    14775             case 1:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
    14776             case 2:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
    14777             case 4:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
    14778             case 8:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
    14779             case 10:
    14780                 Assert(   pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
    14781                        || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
    14782                 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
    14783                 break;
    14784             case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
    14785 # if 0
    14786             case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
    14787             case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
    14788 # endif
    14789             default: AssertFailed(); break;
    14790         }
    14791     }
    14792 # undef IEM_MAP_HLP_FN
    14793 # undef IEM_MAP_HLP_FN_NO_AT
    14794 #endif
    14795 
    14796 #ifdef VBOX_STRICT
    14797     /*
    14798      * Check that the fExec flags we've got make sense.
    14799      */
    14800     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    14801 #endif
    14802 
    14803     /*
    14804      * To keep things simple we have to commit any pending writes first as we
    14805      * may end up making calls.
    14806      */
    14807     off = iemNativeRegFlushPendingWrites(pReNative, off);
    14808 
    14809 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    14810     /*
    14811      * Move/spill/flush stuff out of call-volatile registers.
    14812      * This is the easy way out. We could contain this to the tlb-miss branch
    14813      * by saving and restoring active stuff here.
    14814      */
    14815     /** @todo save+restore active registers and maybe guest shadows in tlb-miss.  */
    14816     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    14817 #endif
    14818 
    14819     /* The bUnmapInfo variable will get a register in the tlb-hit code path,
    14820        while the tlb-miss codepath will temporarily put it on the stack.
    14821        Set the the type to stack here so we don't need to do it twice below. */
    14822     iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
    14823     uint8_t const idxRegUnmapInfo   = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
    14824     /** @todo use a tmp register from TlbState, since they'll be free after tlb
    14825      *        lookup is done. */
    14826 
    14827     /*
    14828      * Define labels and allocate the result register (trying for the return
    14829      * register if we can).
    14830      */
    14831     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    14832     uint8_t  const idxRegMemResult   = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
    14833                                      ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
    14834                                      : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
    14835     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
    14836     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    14837                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    14838                                      : UINT32_MAX;
    14839 //off=iemNativeEmitBrk(pReNative, off, 0);
    14840     /*
    14841      * Jump to the TLB lookup code.
    14842      */
    14843     if (!TlbState.fSkip)
    14844         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    14845 
    14846     /*
    14847      * TlbMiss:
    14848      *
    14849      * Call helper to do the fetching.
    14850      * We flush all guest register shadow copies here.
    14851      */
    14852     uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
    14853 
    14854 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    14855     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    14856 #else
    14857     RT_NOREF(idxInstr);
    14858 #endif
    14859 
    14860 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    14861     /* Save variables in volatile registers. */
    14862     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
    14863     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    14864 #endif
    14865 
    14866     /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
    14867     off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
    14868 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    14869                                                     IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
    14870 #else
    14871                                                     IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    14872 #endif
    14873 
    14874     /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
    14875     if (iSegReg != UINT8_MAX)
    14876     {
    14877         AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
    14878         off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
    14879     }
    14880 
    14881     /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
    14882     int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
    14883     off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
    14884 
    14885     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    14886     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    14887 
    14888     /* Done setting up parameters, make the call. */
    14889     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    14890 
    14891     /*
    14892      * Put the output in the right registers.
    14893      */
    14894     Assert(idxRegMemResult == pVarMem->idxReg);
    14895     if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
    14896         off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
    14897 
    14898 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    14899     /* Restore variables and guest shadow registers to volatile registers. */
    14900     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    14901     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    14902 #endif
    14903 
    14904     Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
    14905     off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
    14906 
    14907 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    14908     if (!TlbState.fSkip)
    14909     {
    14910         /* end of tlbsmiss - Jump to the done label. */
    14911         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    14912         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    14913 
    14914         /*
    14915          * TlbLookup:
    14916          */
    14917         off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
    14918                                            idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    14919 # ifdef VBOX_WITH_STATISTICS
    14920         off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
    14921                                                 RT_UOFFSETOF(VMCPUCC,  iem.s.StatNativeTlbHitsForMapped));
    14922 # endif
    14923 
    14924         /* [idxVarUnmapInfo] = 0; */
    14925         off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
    14926 
    14927         /*
    14928          * TlbDone:
    14929          */
    14930         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    14931 
    14932         TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
    14933 
    14934 # ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    14935         /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
    14936         iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    14937 # endif
    14938     }
    14939 #else
    14940     RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
    14941 #endif
    14942 
    14943     iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
    14944     iemNativeVarRegisterRelease(pReNative, idxVarMem);
    14945 
    14946     return off;
    14947 }
    14948 
    14949 
    14950 #define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
    14951     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
    14952                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
    14953 
    14954 #define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
    14955     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
    14956                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
    14957 
    14958 #define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
    14959     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
    14960                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
    14961 
    14962 #define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
    14963     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
    14964                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
    14965 
    14966 DECL_INLINE_THROW(uint32_t)
    14967 iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
    14968                                uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
    14969 {
    14970     /*
    14971      * Assert sanity.
    14972      */
    14973     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
    14974 #if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
    14975     PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
    14976 #endif
    14977     Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
    14978     Assert(   pVarUnmapInfo->idxReg       < RT_ELEMENTS(pReNative->Core.aHstRegs)
    14979            || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
    14980 #ifdef VBOX_STRICT
    14981     switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
    14982     {
    14983         case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
    14984             Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
    14985         case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
    14986             Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
    14987         case IEM_ACCESS_TYPE_WRITE:
    14988             Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
    14989         case IEM_ACCESS_TYPE_READ:
    14990             Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
    14991         default: AssertFailed();
    14992     }
    14993 #else
    14994     RT_NOREF(fAccess);
    14995 #endif
    14996 
    14997     /*
    14998      * To keep things simple we have to commit any pending writes first as we
    14999      * may end up making calls (there shouldn't be any at this point, so this
    15000      * is just for consistency).
    15001      */
    15002     /** @todo we could postpone this till we make the call and reload the
    15003      * registers after returning from the call. Not sure if that's sensible or
    15004      * not, though. */
    15005     off = iemNativeRegFlushPendingWrites(pReNative, off);
    15006 
    15007     /*
    15008      * Move/spill/flush stuff out of call-volatile registers.
    15009      *
    15010      * We exclude any register holding the bUnmapInfo variable, as we'll be
    15011      * checking it after returning from the call and will free it afterwards.
    15012      */
    15013     /** @todo save+restore active registers and maybe guest shadows in miss
    15014      *        scenario. */
    15015     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
    15016 
    15017     /*
    15018      * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
    15019      * to call the unmap helper function.
    15020      *
    15021      * The likelyhood of it being zero is higher than for the TLB hit when doing
    15022      * the mapping, as a TLB miss for an well aligned and unproblematic memory
    15023      * access should also end up with a mapping that won't need special unmapping.
    15024      */
    15025     /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case!  That
    15026      *        should speed up things for the pure interpreter as well when TLBs
    15027      *        are enabled. */
    15028 #ifdef RT_ARCH_AMD64
    15029     if (pVarUnmapInfo->idxReg == UINT8_MAX)
    15030     {
    15031         /* test byte [rbp - xxx], 0ffh  */
    15032         uint8_t * const pbCodeBuf    = iemNativeInstrBufEnsure(pReNative, off, 7);
    15033         pbCodeBuf[off++] = 0xf6;
    15034         uint8_t const   idxStackSlot = pVarUnmapInfo->idxStackSlot;
    15035         off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
    15036         pbCodeBuf[off++] = 0xff;
    15037         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    15038     }
    15039     else
    15040 #endif
    15041     {
    15042         uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
    15043                                                               true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
    15044         off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
    15045         iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
    15046     }
    15047     uint32_t const offJmpFixup = off;
    15048     off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
    15049 
    15050     /*
    15051      * Call the unmap helper function.
    15052      */
    15053 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
    15054     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    15055 #else
    15056     RT_NOREF(idxInstr);
    15057 #endif
    15058 
    15059     /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
    15060     off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
    15061                                                0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    15062 
    15063     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    15064     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    15065 
    15066     /* Done setting up parameters, make the call. */
    15067     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    15068 
    15069     /* The bUnmapInfo variable is implictly free by these MCs. */
    15070     iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
    15071 
    15072     /*
    15073      * Done, just fixup the jump for the non-call case.
    15074      */
    15075     iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
    15076 
    15077     return off;
    15078 }
    15079 
    15080 
    15081 
    15082 /*********************************************************************************************************************************
    15083 *   State and Exceptions                                                                                                         *
    15084 *********************************************************************************************************************************/
    15085 
    15086 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    15087 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    15088 
    15089 #define IEM_MC_PREPARE_SSE_USAGE()                  off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    15090 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    15091 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    15092 
    15093 #define IEM_MC_PREPARE_AVX_USAGE()                  off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    15094 #define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    15095 #define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    15096 
    15097 
    15098 DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
    15099 {
    15100     /** @todo this needs a lot more work later. */
    15101     RT_NOREF(pReNative, fForChange);
    15102     return off;
    15103 }
    15104 
    15105 
    15106 
    15107 /*********************************************************************************************************************************
    15108 *   Emitters for FPU related operations.                                                                                         *
    15109 *********************************************************************************************************************************/
    15110 
    15111 #define IEM_MC_FETCH_FCW(a_u16Fcw) \
    15112     off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
    15113 
    15114 /** Emits code for IEM_MC_FETCH_FCW. */
    15115 DECL_INLINE_THROW(uint32_t)
    15116 iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
    15117 {
    15118     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    15119     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
    15120 
    15121     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    15122 
    15123     /* Allocate a temporary FCW register. */
    15124     /** @todo eliminate extra register   */
    15125     uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
    15126                                                               kIemNativeGstRegUse_ReadOnly);
    15127 
    15128     off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
    15129 
    15130     /* Free but don't flush the FCW register. */
    15131     iemNativeRegFreeTmp(pReNative, idxFcwReg);
    15132     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    15133 
    15134     return off;
    15135 }
    15136 
    15137 
    15138 #define IEM_MC_FETCH_FSW(a_u16Fsw) \
    15139     off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
    15140 
    15141 /** Emits code for IEM_MC_FETCH_FSW. */
    15142 DECL_INLINE_THROW(uint32_t)
    15143 iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
    15144 {
    15145     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    15146     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
    15147 
    15148     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
    15149     /* Allocate a temporary FSW register. */
    15150     /** @todo eliminate extra register   */
    15151     uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
    15152                                                               kIemNativeGstRegUse_ReadOnly);
    15153 
    15154     off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
    15155 
    15156     /* Free but don't flush the FSW register. */
    15157     iemNativeRegFreeTmp(pReNative, idxFswReg);
    15158     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    15159 
    15160     return off;
    15161 }
    15162 
    15163 
    15164 
    15165 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    15166 
    15167 
    15168 /*********************************************************************************************************************************
    15169 *   Emitters for SSE/AVX specific operations.                                                                                    *
    15170 *********************************************************************************************************************************/
    15171 
    15172 #define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
    15173     off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
    15174 
    15175 /** Emits code for IEM_MC_COPY_XREG_U128. */
    15176 DECL_INLINE_THROW(uint32_t)
    15177 iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
    15178 {
    15179     /* Allocate destination and source register. */
    15180     uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
    15181                                                                           kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
    15182     uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
    15183                                                                           kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
    15184 
    15185     off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
    15186     IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
    15187     /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
    15188 
    15189     /* Free but don't flush the source and destination register. */
    15190     iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
    15191     iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
    15192 
    15193     return off;
    15194 }
    15195 
    15196 
    15197 #define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
    15198     off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
    15199 
    15200 /** Emits code for IEM_MC_FETCH_XREG_U64. */
    15201 DECL_INLINE_THROW(uint32_t)
    15202 iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
    15203 {
    15204     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    15205     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
    15206 
    15207     uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
    15208                                                                           kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
    15209 
    15210     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    15211     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    15212 
    15213     off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
    15214 
    15215     /* Free but don't flush the source register. */
    15216     iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
    15217     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    15218 
    15219     return off;
    15220 }
    15221 
    15222 
    15223 #define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
    15224     off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
    15225 
    15226 /** Emits code for IEM_MC_FETCH_XREG_U32. */
    15227 DECL_INLINE_THROW(uint32_t)
    15228 iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
    15229 {
    15230     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    15231     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
    15232 
    15233     uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
    15234                                                                           kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
    15235 
    15236     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    15237     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    15238 
    15239     off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
    15240 
    15241     /* Free but don't flush the source register. */
    15242     iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
    15243     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    15244 
    15245     return off;
    15246 }
    15247 
    15248 
    15249 #define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
    15250     off = iemNativeEmitSimdStoreXregU64(pReNative, off, a_iXReg, a_u64Value, a_iQWord)
    15251 
    15252 /** Emits code for IEM_MC_STORE_XREG_U64. */
    15253 DECL_INLINE_THROW(uint32_t)
    15254 iemNativeEmitSimdStoreXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iQWord)
    15255 {
    15256     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    15257     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
    15258 
    15259     uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
    15260                                                                           kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
    15261 
    15262     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    15263 
    15264     off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQWord);
    15265     IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
    15266 
    15267     /* Free but don't flush the source register. */
    15268     iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
    15269     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    15270 
    15271     return off;
    15272 }
    15273 
    15274 
    15275 #define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
    15276     off = iemNativeEmitSimdStoreXregU32(pReNative, off, a_iXReg, a_u32Value, a_iDWord)
    15277 
    15278 /** Emits code for IEM_MC_STORE_XREG_U32. */
    15279 DECL_INLINE_THROW(uint32_t)
    15280 iemNativeEmitSimdStoreXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iDWord)
    15281 {
    15282     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    15283     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
    15284 
    15285     uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
    15286                                                                           kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
    15287 
    15288     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    15289 
    15290     off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iDWord);
    15291     IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
    15292 
    15293     /* Free but don't flush the source register. */
    15294     iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
    15295     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    15296 
    15297     return off;
    15298 }
    15299 
    15300 
    15301 #define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
    15302     off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
    15303 
    15304 /** Emits code for IEM_MC_FETCH_YREG_U64. */
    15305 DECL_INLINE_THROW(uint32_t)
    15306 iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
    15307 {
    15308     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    15309     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
    15310 
    15311     uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
    15312                                                                             iQWord >= 2
    15313                                                                           ? kIemNativeGstSimdRegLdStSz_High128
    15314                                                                           : kIemNativeGstSimdRegLdStSz_Low128,
    15315                                                                           kIemNativeGstRegUse_ReadOnly);
    15316 
    15317     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    15318     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    15319 
    15320     off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
    15321 
    15322     /* Free but don't flush the source register. */
    15323     iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
    15324     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    15325 
    15326     return off;
    15327 }
    15328 
    15329 
    15330 #define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
    15331     off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
    15332 
    15333 /** Emits code for IEM_MC_FETCH_YREG_U32. */
    15334 DECL_INLINE_THROW(uint32_t)
    15335 iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
    15336 {
    15337     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    15338     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
    15339 
    15340     uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
    15341                                                                             iDWord >= 4
    15342                                                                           ? kIemNativeGstSimdRegLdStSz_High128
    15343                                                                           : kIemNativeGstSimdRegLdStSz_Low128,
    15344                                                                           kIemNativeGstRegUse_ReadOnly);
    15345 
    15346     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    15347     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    15348 
    15349     off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
    15350 
    15351     /* Free but don't flush the source register. */
    15352     iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
    15353     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    15354 
    15355     return off;
    15356 }
    15357 
    15358 
    15359 #define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
    15360     off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
    15361 
    15362 /** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
    15363 DECL_INLINE_THROW(uint32_t)
    15364 iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
    15365 {
    15366     uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
    15367                                                                        kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
    15368 
    15369     off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
    15370     IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
    15371 
    15372     /* Free but don't flush the register. */
    15373     iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
    15374 
    15375     return off;
    15376 }
    15377 
    15378 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    15379 
    15380 
    15381 /*********************************************************************************************************************************
    15382 *   The native code generator functions for each MC block.                                                                       *
    15383 *********************************************************************************************************************************/
    15384 
    15385 /*
    15386  * Include instruction emitters.
    15387  */
    15388 #include "target-x86/IEMAllN8veEmit-x86.h"
    15389 
    15390 /*
    15391  * Include g_apfnIemNativeRecompileFunctions and associated functions.
    15392  *
    15393  * This should probably live in it's own file later, but lets see what the
    15394  * compile times turn out to be first.
    15395  */
    15396 #include "IEMNativeFunctions.cpp.h"
    15397 
    15398 
    153998232
    154008233/*********************************************************************************************************************************
    154018234*   Recompiler Core.                                                                                                             *
    154028235*********************************************************************************************************************************/
    15403 
    154048236
    154058237/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
  • trunk/src/VBox/VMM/VMMAll/IEMAllThrdPython.py

    r103787 r103807  
    27782778    );
    27792779
    2780     def generateThreadedFunctionsHeader(self, oOut):
     2780    def generateThreadedFunctionsHeader(self, oOut, _):
    27812781        """
    27822782        Generates the threaded functions header file.
     
    28852885
    28862886    kasThreadedParamNames = ('uParam0', 'uParam1', 'uParam2');
    2887     def generateThreadedFunctionsSource(self, oOut):
     2887    def generateThreadedFunctionsSource(self, oOut, _):
    28882888        """
    28892889        Generates the threaded functions source file.
     
    30353035        return True;
    30363036
    3037     def generateNativeFunctionsHeader(self, oOut):
     3037    def generateNativeFunctionsHeader(self, oOut, _):
    30383038        """
    30393039        Generates the native recompiler functions header file.
     
    30733073                ];
    30743074
     3075        # Emit prototypes for table function.
     3076        asLines += [
     3077            '',
     3078            '#ifdef IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES'
     3079        ]
     3080        for sVariation in ThreadedFunctionVariation.kasVariationsEmitOrder:
     3081            sVarName = ThreadedFunctionVariation.kdVariationNames[sVariation];
     3082            asLines += [
     3083                '',
     3084                '/* Variation: ' + sVarName + ' */',
     3085            ];
     3086            for oThreadedFunction in self.aoThreadedFuncs:
     3087                oVariation = oThreadedFunction.dVariations.get(sVariation, None) # type: ThreadedFunctionVariation
     3088                if oVariation and oVariation.oNativeRecomp and oVariation.oNativeRecomp.isRecompilable():
     3089                    asLines.append('IEM_DECL_IEMNATIVERECOMPFUNC_PROTO(' + oVariation.getNativeFunctionName() + ');');
     3090        asLines += [
     3091            '',
     3092            '#endif /* IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES */',
     3093        ]
     3094
    30753095        oOut.write('\n'.join(asLines));
    30763096        return True;
    30773097
    3078     def generateNativeFunctionsSource(self, oOut):
     3098    def generateNativeFunctionsSource(self, oOut, idxPart):
    30793099        """
    30803100        Generates the native recompiler functions source file.
    30813101        Returns success indicator.
    30823102        """
     3103        cParts = 4;
     3104        assert(idxPart in range(cParts));
    30833105        if not self.oOptions.fNativeRecompilerEnabled:
    30843106            return True;
     
    30923114        # Emit the functions.
    30933115        #
    3094         for sVariation in ThreadedFunctionVariation.kasVariationsEmitOrder:
     3116        # The files are split up by threaded variation as that's the simplest way to
     3117        # do it, even if the distribution isn't entirely even (ksVariation_Default
     3118        # only has the defer to cimpl bits and the pre-386 variants will naturally
     3119        # have fewer instructions).
     3120        #
     3121        cVariationsPerFile = len(ThreadedFunctionVariation.kasVariationsEmitOrder) // cParts;
     3122        idxFirstVar        = idxPart * cVariationsPerFile;
     3123        idxEndVar          = idxFirstVar + cVariationsPerFile;
     3124        if idxPart + 1 >= cParts:
     3125            idxEndVar      = len(ThreadedFunctionVariation.kasVariationsEmitOrder);
     3126        for sVariation in ThreadedFunctionVariation.kasVariationsEmitOrder[idxFirstVar:idxEndVar]:
    30953127            sVarName = ThreadedFunctionVariation.kdVariationNames[sVariation];
    30963128            oOut.write(  '\n'
     
    31163148                                     ' (macro expansion)' if oMcBlock.iBeginLine == oMcBlock.iEndLine else '')
    31173149                               + ' */\n'
    3118                                + 'static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(' + oVariation.getNativeFunctionName() + ')\n'
     3150                               + 'IEM_DECL_IEMNATIVERECOMPFUNC_DEF(' + oVariation.getNativeFunctionName() + ')\n'
    31193151                               + '{\n');
    31203152
     
    31313163
    31323164        #
    3133         # Output the function table.
    3134         #
    3135         oOut.write(   '\n'
    3136                     + '\n'
    3137                     + '/*\n'
    3138                     + ' * Function table running parallel to g_apfnIemThreadedFunctions and friends.\n'
    3139                     + ' */\n'
    3140                     + 'const PFNIEMNATIVERECOMPFUNC g_apfnIemNativeRecompileFunctions[kIemThreadedFunc_End] =\n'
    3141                     + '{\n'
    3142                     + '    /*Invalid*/ NULL,'
    3143                     + '\n'
    3144                     + '    /*\n'
    3145                     + '     * Predefined.\n'
    3146                     + '     */\n'
    3147                     );
    3148         for sFuncNm, _, fHaveRecompFunc in self.katBltIns:
    3149             if fHaveRecompFunc:
    3150                 oOut.write('    iemNativeRecompFunc_BltIn_%s,\n' % (sFuncNm,))
    3151             else:
    3152                 oOut.write('    NULL, /*BltIn_%s*/\n' % (sFuncNm,))
    3153 
    3154         iThreadedFunction = 1 + len(self.katBltIns);
    3155         for sVariation in ThreadedFunctionVariation.kasVariationsEmitOrder:
    3156             oOut.write(  '    /*\n'
    3157                        + '     * Variation: ' + ThreadedFunctionVariation.kdVariationNames[sVariation] + '\n'
    3158                        + '     */\n');
    3159             for oThreadedFunction in self.aoThreadedFuncs:
    3160                 oVariation = oThreadedFunction.dVariations.get(sVariation, None);
    3161                 if oVariation:
    3162                     iThreadedFunction += 1;
    3163                     assert oVariation.iEnumValue == iThreadedFunction;
    3164                     sName = oVariation.getNativeFunctionName();
    3165                     if oVariation.oNativeRecomp and oVariation.oNativeRecomp.isRecompilable():
    3166                         oOut.write('    /*%4u*/ %s,\n' % (iThreadedFunction, sName,));
    3167                     else:
    3168                         oOut.write('    /*%4u*/ NULL /*%s*/,\n' % (iThreadedFunction, sName,));
    3169 
    3170         oOut.write(  '};\n'
    3171                    + '\n');
     3165        # Output the function table if this is the first file.
     3166        #
     3167        if idxPart == 0:
     3168            oOut.write(   '\n'
     3169                        + '\n'
     3170                        + '/*\n'
     3171                        + ' * Function table running parallel to g_apfnIemThreadedFunctions and friends.\n'
     3172                        + ' */\n'
     3173                        + 'const PFNIEMNATIVERECOMPFUNC g_apfnIemNativeRecompileFunctions[kIemThreadedFunc_End] =\n'
     3174                        + '{\n'
     3175                        + '    /*Invalid*/ NULL,'
     3176                        + '\n'
     3177                        + '    /*\n'
     3178                        + '     * Predefined.\n'
     3179                        + '     */\n'
     3180                        );
     3181            for sFuncNm, _, fHaveRecompFunc in self.katBltIns:
     3182                if fHaveRecompFunc:
     3183                    oOut.write('    iemNativeRecompFunc_BltIn_%s,\n' % (sFuncNm,))
     3184                else:
     3185                    oOut.write('    NULL, /*BltIn_%s*/\n' % (sFuncNm,))
     3186
     3187            iThreadedFunction = 1 + len(self.katBltIns);
     3188            for sVariation in ThreadedFunctionVariation.kasVariationsEmitOrder:
     3189                oOut.write(  '    /*\n'
     3190                           + '     * Variation: ' + ThreadedFunctionVariation.kdVariationNames[sVariation] + '\n'
     3191                           + '     */\n');
     3192                for oThreadedFunction in self.aoThreadedFuncs:
     3193                    oVariation = oThreadedFunction.dVariations.get(sVariation, None);
     3194                    if oVariation:
     3195                        iThreadedFunction += 1;
     3196                        assert oVariation.iEnumValue == iThreadedFunction;
     3197                        sName = oVariation.getNativeFunctionName();
     3198                        if oVariation.oNativeRecomp and oVariation.oNativeRecomp.isRecompilable():
     3199                            oOut.write('    /*%4u*/ %s,\n' % (iThreadedFunction, sName,));
     3200                        else:
     3201                            oOut.write('    /*%4u*/ NULL /*%s*/,\n' % (iThreadedFunction, sName,));
     3202
     3203            oOut.write(  '};\n');
     3204
     3205        oOut.write('\n');
    31723206        return True;
    31733207
    3174     def generateNativeLivenessSource(self, oOut):
     3208    def generateNativeLivenessSource(self, oOut, _):
    31753209        """
    31763210        Generates the native recompiler liveness analysis functions source file.
     
    33733407        return True;
    33743408
    3375     def generateModifiedInput1(self, oOut):
    3376         """
    3377         Generates the combined modified input source/header file, part 1.
    3378         Returns success indicator.
    3379         """
    3380         return self.generateModifiedInput(oOut, 1);
    3381 
    3382     def generateModifiedInput2(self, oOut):
    3383         """
    3384         Generates the combined modified input source/header file, part 2.
    3385         Returns success indicator.
    3386         """
    3387         return self.generateModifiedInput(oOut, 2);
    3388 
    3389     def generateModifiedInput3(self, oOut):
    3390         """
    3391         Generates the combined modified input source/header file, part 3.
    3392         Returns success indicator.
    3393         """
    3394         return self.generateModifiedInput(oOut, 3);
    3395 
    3396     def generateModifiedInput4(self, oOut):
    3397         """
    3398         Generates the combined modified input source/header file, part 4.
    3399         Returns success indicator.
    3400         """
    3401         return self.generateModifiedInput(oOut, 4);
    3402 
    34033409
    34043410    #
     
    34483454                             default = '-',
    34493455                             help    = 'The output header file for the native recompiler functions.');
    3450         oParser.add_argument('--out-n8ve-funcs-cpp',
    3451                              metavar = 'file-n8tv-funcs.cpp',
    3452                              dest    = 'sOutFileN8veFuncsCpp',
     3456        oParser.add_argument('--out-n8ve-funcs-cpp1',
     3457                             metavar = 'file-n8tv-funcs1.cpp',
     3458                             dest    = 'sOutFileN8veFuncsCpp1',
    34533459                             action  = 'store',
    34543460                             default = '-',
    3455                              help    = 'The output C++ file for the native recompiler functions.');
     3461                             help    = 'The output C++ file for the native recompiler functions part 1.');
     3462        oParser.add_argument('--out-n8ve-funcs-cpp2',
     3463                             metavar = 'file-n8ve-funcs2.cpp',
     3464                             dest    = 'sOutFileN8veFuncsCpp2',
     3465                             action  = 'store',
     3466                             default = '-',
     3467                             help    = 'The output C++ file for the native recompiler functions part 2.');
     3468        oParser.add_argument('--out-n8ve-funcs-cpp3',
     3469                             metavar = 'file-n8ve-funcs3.cpp',
     3470                             dest    = 'sOutFileN8veFuncsCpp3',
     3471                             action  = 'store',
     3472                             default = '-',
     3473                             help    = 'The output C++ file for the native recompiler functions part 3.');
     3474        oParser.add_argument('--out-n8ve-funcs-cpp4',
     3475                             metavar = 'file-n8ve-funcs4.cpp',
     3476                             dest    = 'sOutFileN8veFuncsCpp4',
     3477                             action  = 'store',
     3478                             default = '-',
     3479                             help    = 'The output C++ file for the native recompiler functions part 4.');
    34563480        oParser.add_argument('--out-n8ve-liveness-cpp',
    3457                              metavar = 'file-n8tv-liveness.cpp',
     3481                             metavar = 'file-n8ve-liveness.cpp',
    34583482                             dest    = 'sOutFileN8veLivenessCpp',
    34593483                             action  = 'store',
     
    35123536            #
    35133537            aaoOutputFiles = (
    3514                  ( self.oOptions.sOutFileThrdFuncsHdr,      self.generateThreadedFunctionsHeader ),
    3515                  ( self.oOptions.sOutFileThrdFuncsCpp,      self.generateThreadedFunctionsSource ),
    3516                  ( self.oOptions.sOutFileN8veFuncsHdr,      self.generateNativeFunctionsHeader ),
    3517                  ( self.oOptions.sOutFileN8veFuncsCpp,      self.generateNativeFunctionsSource ),
    3518                  ( self.oOptions.sOutFileN8veLivenessCpp,   self.generateNativeLivenessSource ),
    3519                  ( self.oOptions.sOutFileModInput1,         self.generateModifiedInput1 ),
    3520                  ( self.oOptions.sOutFileModInput2,         self.generateModifiedInput2 ),
    3521                  ( self.oOptions.sOutFileModInput3,         self.generateModifiedInput3 ),
    3522                  ( self.oOptions.sOutFileModInput4,         self.generateModifiedInput4 ),
     3538                 ( self.oOptions.sOutFileThrdFuncsHdr,      self.generateThreadedFunctionsHeader, 0, ),
     3539                 ( self.oOptions.sOutFileThrdFuncsCpp,      self.generateThreadedFunctionsSource, 0, ),
     3540                 ( self.oOptions.sOutFileN8veFuncsHdr,      self.generateNativeFunctionsHeader,   0, ),
     3541                 ( self.oOptions.sOutFileN8veFuncsCpp1,     self.generateNativeFunctionsSource,   0, ),
     3542                 ( self.oOptions.sOutFileN8veFuncsCpp2,     self.generateNativeFunctionsSource,   1, ),
     3543                 ( self.oOptions.sOutFileN8veFuncsCpp3,     self.generateNativeFunctionsSource,   2, ),
     3544                 ( self.oOptions.sOutFileN8veFuncsCpp4,     self.generateNativeFunctionsSource,   3, ),
     3545                 ( self.oOptions.sOutFileN8veLivenessCpp,   self.generateNativeLivenessSource,    0, ),
     3546                 ( self.oOptions.sOutFileModInput1,         self.generateModifiedInput,           1, ),
     3547                 ( self.oOptions.sOutFileModInput2,         self.generateModifiedInput,           2, ),
     3548                 ( self.oOptions.sOutFileModInput3,         self.generateModifiedInput,           3, ),
     3549                 ( self.oOptions.sOutFileModInput4,         self.generateModifiedInput,           4, ),
    35233550            );
    35243551            fRc = True;
    3525             for sOutFile, fnGenMethod in aaoOutputFiles:
     3552            for sOutFile, fnGenMethod, iPartNo in aaoOutputFiles:
    35263553                if sOutFile == '-':
    35273554                    fRc = fnGenMethod(sys.stdout) and fRc;
     
    35323559                        print('error! Failed open "%s" for writing: %s' % (sOutFile, oXcpt,), file = sys.stderr);
    35333560                        return 1;
    3534                     fRc = fnGenMethod(oOut) and fRc;
     3561                    fRc = fnGenMethod(oOut, iPartNo) and fRc;
    35353562                    oOut.close();
    35363563            if fRc:
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r103804 r103807  
    796796/** @} */
    797797
    798 
    799798#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     799
    800800/**
    801801 * Guest registers that can be shadowed in host SIMD registers.
     
    832832    kIemNativeGstSimdRegLdStSz_End
    833833} IEMNATIVEGSTSIMDREGLDSTSZ;
    834 #endif
    835 
     834
     835#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    836836
    837837/**
     
    13951395
    13961396
     1397#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     1398DECL_HIDDEN_THROW(void)     iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
     1399DECL_HIDDEN_THROW(void)     iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
     1400                                                                 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX);
     1401# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1402DECL_HIDDEN_THROW(void)     iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative,
     1403                                                                     IEMNATIVEGSTSIMDREG enmGstSimdReg,
     1404                                                                     uint8_t idxHstSimdReg = UINT8_MAX,
     1405                                                                     uint8_t idxHstSimdRegPrev = UINT8_MAX);
     1406# endif
     1407DECL_HIDDEN_THROW(void)     iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative,
     1408                                                               uint32_t offPc, uint32_t cInstrSkipped);
     1409#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
     1410
    13971411DECL_HIDDEN_THROW(uint32_t) iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    13981412                                                 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0);
     
    14161430DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs);
    14171431DECL_HIDDEN_THROW(uint8_t)  iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
     1432DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
     1433                                                            uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    14181434DECLHIDDEN(void)            iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
    14191435DECLHIDDEN(void)            iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
     
    14211437DECLHIDDEN(void)            iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT;
    14221438DECLHIDDEN(void)            iemNativeRegFreeAndFlushMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegMask) RT_NOEXCEPT;
    1423 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExept = 0, bool fFlushShadows = true);
    14241439DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs,
    14251440                                                                  uint32_t fKeepVars = 0);
     
    14281443DECL_HIDDEN_THROW(uint32_t) iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    14291444                                                                          uint32_t fHstRegsActiveShadows);
    1430 
     1445#ifdef VBOX_STRICT
     1446DECLHIDDEN(void)            iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
     1447#endif
     1448DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     1449                                                               uint64_t fGstShwExcept, bool fFlushShadows);
     1450#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     1451DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off);
     1452#endif
     1453
     1454
     1455#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1456DECL_HIDDEN_THROW(uint8_t)  iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile = true);
     1457DECL_HIDDEN_THROW(uint8_t)  iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
     1458                                                       bool fPreferVolatile = true);
     1459DECL_HIDDEN_THROW(uint8_t)  iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
     1460                                                                    IEMNATIVEGSTSIMDREG enmGstSimdReg,
     1461                                                                    IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz,
     1462                                                                    IEMNATIVEGSTREGUSE enmIntendedUse = kIemNativeGstRegUse_ReadOnly,
     1463                                                                    bool fNoVolatileRegs = false);
     1464DECLHIDDEN(void)            iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT;
     1465DECLHIDDEN(void)            iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT;
     1466DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     1467                                                              IEMNATIVEGSTSIMDREG enmGstSimdReg);
     1468DECL_HIDDEN_THROW(uint32_t) iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     1469                                                                         uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
     1470                                                                         IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
     1471#endif
     1472
     1473DECL_HIDDEN_THROW(uint8_t)  iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType);
     1474DECL_HIDDEN_THROW(uint8_t)  iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue);
     1475DECL_HIDDEN_THROW(uint8_t)  iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar);
     1476DECL_HIDDEN_THROW(uint8_t)  iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType);
     1477DECL_HIDDEN_THROW(uint8_t)  iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue);
     1478DECL_HIDDEN_THROW(void)     iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
     1479DECL_HIDDEN_THROW(void)     iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue);
     1480DECL_HIDDEN_THROW(void)     iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
     1481                                                           IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg);
    14311482DECL_HIDDEN_THROW(uint8_t)  iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
    14321483DECL_HIDDEN_THROW(uint8_t)  iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
     
    14381489DECL_HIDDEN_THROW(uint32_t) iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    14391490                                                                       uint32_t fHstRegsNotToSave);
     1491DECLHIDDEN(void)            iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
     1492DECLHIDDEN(void)            iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars);
    14401493
    14411494DECL_HIDDEN_THROW(uint32_t) iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    14421495                                                                 uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg);
     1496#ifdef VBOX_STRICT
     1497DECL_HIDDEN_THROW(uint32_t) iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg);
     1498DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg,
     1499                                                            IEMNATIVEGSTREG enmGstReg);
     1500# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     1501DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
     1502                                                                IEMNATIVEGSTSIMDREG enmGstSimdReg,
     1503                                                                IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
     1504# endif
     1505DECL_HIDDEN_THROW(uint32_t) iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec);
     1506#endif
    14431507DECL_HIDDEN_THROW(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr);
     1508DECL_HIDDEN_THROW(uint32_t) iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs);
    14441509DECL_HIDDEN_THROW(uint32_t) iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
    14451510                                                   uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
     
    14471512DECL_HIDDEN_THROW(uint32_t) iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    14481513                                                      PCIEMTHRDEDCALLENTRY pCallEntry);
    1449 
    1450 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    1451 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile = true);
    1452 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
    1453                                                       bool fPreferVolatile = true);
    1454 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    1455                                                                    IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse = kIemNativeGstRegUse_ReadOnly,
    1456                                                                    bool fNoVolatileRegs = false);
    1457 DECL_HIDDEN_THROW(uint32_t) iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    1458                                                                          uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    1459                                                                          IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
    1460 #endif
    1461 
    1462 extern DECL_HIDDEN_DATA(const char * const) g_apszIemNativeHstRegNames[];
     1514DECL_HIDDEN_THROW(uint32_t) iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     1515                                                                        uint8_t idxAddrReg, uint8_t idxInstr);
     1516DECL_HIDDEN_THROW(uint32_t) iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     1517                                                                                  uint8_t idxAddrReg, uint8_t idxInstr);
     1518
     1519
     1520IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr));
     1521IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu));
     1522IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu));
     1523IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu));
     1524IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu));
     1525IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu));
     1526IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu));
     1527IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu));
     1528IEM_DECL_NATIVE_HLP_PROTO(int,      iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu));
     1529
     1530IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1531IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1532IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1533IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1534IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1535IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1536IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1537IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1538IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1539IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1540IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value));
     1541IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value));
     1542IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value));
     1543IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value));
     1544IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value));
     1545IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value));
     1546IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value));
     1547IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value));
     1548IEM_DECL_NATIVE_HLP_PROTO(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1549IEM_DECL_NATIVE_HLP_PROTO(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1550IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1551
     1552IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1553IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1554IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1555IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1556IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1557IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1558IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1559IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1560IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1561IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1562IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value));
     1563IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value));
     1564IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value));
     1565IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value));
     1566IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value));
     1567IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value));
     1568IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value));
     1569IEM_DECL_NATIVE_HLP_PROTO(void,     iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value));
     1570IEM_DECL_NATIVE_HLP_PROTO(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1571IEM_DECL_NATIVE_HLP_PROTO(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1572IEM_DECL_NATIVE_HLP_PROTO(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem));
     1573
     1574IEM_DECL_NATIVE_HLP_PROTO(uint8_t *,        iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1575IEM_DECL_NATIVE_HLP_PROTO(uint8_t *,        iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1576IEM_DECL_NATIVE_HLP_PROTO(uint8_t *,        iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1577IEM_DECL_NATIVE_HLP_PROTO(uint8_t const *,  iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1578IEM_DECL_NATIVE_HLP_PROTO(uint16_t *,       iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1579IEM_DECL_NATIVE_HLP_PROTO(uint16_t *,       iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1580IEM_DECL_NATIVE_HLP_PROTO(uint16_t *,       iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1581IEM_DECL_NATIVE_HLP_PROTO(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1582IEM_DECL_NATIVE_HLP_PROTO(uint32_t *,       iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1583IEM_DECL_NATIVE_HLP_PROTO(uint32_t *,       iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1584IEM_DECL_NATIVE_HLP_PROTO(uint32_t *,       iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1585IEM_DECL_NATIVE_HLP_PROTO(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1586IEM_DECL_NATIVE_HLP_PROTO(uint64_t *,       iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1587IEM_DECL_NATIVE_HLP_PROTO(uint64_t *,       iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1588IEM_DECL_NATIVE_HLP_PROTO(uint64_t *,       iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1589IEM_DECL_NATIVE_HLP_PROTO(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1590IEM_DECL_NATIVE_HLP_PROTO(RTFLOAT80U *,     iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1591IEM_DECL_NATIVE_HLP_PROTO(RTPBCD80U *,      iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1592IEM_DECL_NATIVE_HLP_PROTO(RTUINT128U *,     iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1593IEM_DECL_NATIVE_HLP_PROTO(RTUINT128U *,     iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1594IEM_DECL_NATIVE_HLP_PROTO(RTUINT128U *,     iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1595IEM_DECL_NATIVE_HLP_PROTO(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem, uint8_t iSegReg));
     1596
     1597IEM_DECL_NATIVE_HLP_PROTO(uint8_t *,        iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1598IEM_DECL_NATIVE_HLP_PROTO(uint8_t *,        iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1599IEM_DECL_NATIVE_HLP_PROTO(uint8_t *,        iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1600IEM_DECL_NATIVE_HLP_PROTO(uint8_t const *,  iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1601IEM_DECL_NATIVE_HLP_PROTO(uint16_t *,       iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1602IEM_DECL_NATIVE_HLP_PROTO(uint16_t *,       iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1603IEM_DECL_NATIVE_HLP_PROTO(uint16_t *,       iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1604IEM_DECL_NATIVE_HLP_PROTO(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1605IEM_DECL_NATIVE_HLP_PROTO(uint32_t *,       iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1606IEM_DECL_NATIVE_HLP_PROTO(uint32_t *,       iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1607IEM_DECL_NATIVE_HLP_PROTO(uint32_t *,       iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1608IEM_DECL_NATIVE_HLP_PROTO(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1609IEM_DECL_NATIVE_HLP_PROTO(uint64_t *,       iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1610IEM_DECL_NATIVE_HLP_PROTO(uint64_t *,       iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1611IEM_DECL_NATIVE_HLP_PROTO(uint64_t *,       iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1612IEM_DECL_NATIVE_HLP_PROTO(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1613IEM_DECL_NATIVE_HLP_PROTO(RTFLOAT80U *,     iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1614IEM_DECL_NATIVE_HLP_PROTO(RTPBCD80U *,      iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1615IEM_DECL_NATIVE_HLP_PROTO(RTUINT128U *,     iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1616IEM_DECL_NATIVE_HLP_PROTO(RTUINT128U *,     iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1617IEM_DECL_NATIVE_HLP_PROTO(RTUINT128U *,     iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1618IEM_DECL_NATIVE_HLP_PROTO(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem));
     1619
     1620IEM_DECL_NATIVE_HLP_PROTO(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo));
     1621IEM_DECL_NATIVE_HLP_PROTO(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo));
     1622IEM_DECL_NATIVE_HLP_PROTO(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo));
     1623IEM_DECL_NATIVE_HLP_PROTO(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo));
     1624
     1625
     1626/**
     1627 * Info about shadowed guest register values.
     1628 * @see IEMNATIVEGSTREG
     1629 */
     1630typedef struct IEMANTIVEGSTREGINFO
     1631{
     1632    /** Offset in VMCPU. */
     1633    uint32_t    off;
     1634    /** The field size. */
     1635    uint8_t     cb;
     1636    /** Name (for logging). */
     1637    const char *pszName;
     1638} IEMANTIVEGSTREGINFO;
     1639extern DECL_HIDDEN_DATA(IEMANTIVEGSTREGINFO const)  g_aGstShadowInfo[];
     1640extern DECL_HIDDEN_DATA(const char * const)         g_apszIemNativeHstRegNames[];
     1641extern DECL_HIDDEN_DATA(int32_t const)              g_aoffIemNativeCallStackArgBpDisp[];
     1642extern DECL_HIDDEN_DATA(uint32_t const)             g_afIemNativeCallRegs[];
     1643extern DECL_HIDDEN_DATA(uint8_t const)              g_aidxIemNativeCallRegs[];
     1644
    14631645
    14641646
     
    15791761}
    15801762
     1763
     1764/**
     1765 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
     1766 *
     1767 * @returns The flush mask.
     1768 * @param   fCImpl          The IEM_CIMPL_F_XXX flags.
     1769 * @param   fGstShwFlush    The starting flush mask.
     1770 */
     1771DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
     1772{
     1773    if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
     1774        fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_CS)
     1775                     |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_CS)
     1776                     |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
     1777    if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
     1778        fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
     1779                     |  RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_SS)
     1780                     |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_SS)
     1781                     |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
     1782    else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
     1783        fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
     1784    if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
     1785        fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
     1786    return fGstShwFlush;
     1787}
     1788
     1789
     1790/** Number of hidden arguments for CIMPL calls.
     1791 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
     1792#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
     1793# define IEM_CIMPL_HIDDEN_ARGS 3
     1794#else
     1795# define IEM_CIMPL_HIDDEN_ARGS 2
     1796#endif
     1797
     1798
     1799#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
     1800
     1801# ifndef IEMLIVENESS_EXTENDED_LAYOUT
     1802/**
     1803 * Helper for iemNativeLivenessGetStateByGstReg.
     1804 *
     1805 * @returns IEMLIVENESS_STATE_XXX
     1806 * @param   fMergedStateExp2    This is the RT_BIT_32() of each sub-state
     1807 *                              ORed together.
     1808 */
     1809DECL_FORCE_INLINE(uint32_t)
     1810iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
     1811{
     1812    /* INPUT trumps anything else. */
     1813    if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
     1814        return IEMLIVENESS_STATE_INPUT;
     1815
     1816    /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
     1817    if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
     1818    {
     1819        /* If not all sub-fields are clobbered they must be considered INPUT. */
     1820        if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
     1821            return IEMLIVENESS_STATE_INPUT;
     1822        return IEMLIVENESS_STATE_CLOBBERED;
     1823    }
     1824
     1825    /* XCPT_OR_CALL trumps UNUSED. */
     1826    if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
     1827        return IEMLIVENESS_STATE_XCPT_OR_CALL;
     1828
     1829    return IEMLIVENESS_STATE_UNUSED;
     1830}
     1831# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
     1832
     1833
     1834DECL_FORCE_INLINE(uint32_t)
     1835iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
     1836{
     1837# ifndef IEMLIVENESS_EXTENDED_LAYOUT
     1838    return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
     1839         | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
     1840# else
     1841    return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx)       & 1)
     1842         | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
     1843         | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
     1844         | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
     1845# endif
     1846}
     1847
     1848
     1849DECL_FORCE_INLINE(uint32_t)
     1850iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
     1851{
     1852    uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
     1853    if (enmGstReg == kIemNativeGstReg_EFlags)
     1854    {
     1855        /* Merge the eflags states to one. */
     1856# ifndef IEMLIVENESS_EXTENDED_LAYOUT
     1857        uRet  = RT_BIT_32(uRet);
     1858        uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
     1859        uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
     1860        uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
     1861        uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
     1862        uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
     1863        uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
     1864        uRet  = iemNativeLivenessMergeExpandedEFlagsState(uRet);
     1865# else
     1866        AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
     1867        uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
     1868        uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
     1869        uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
     1870        uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
     1871        uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
     1872        uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
     1873# endif
     1874    }
     1875    return uRet;
     1876}
     1877
     1878
     1879# ifdef VBOX_STRICT
     1880/** For assertions only, user checks that idxCurCall isn't zerow. */
     1881DECL_FORCE_INLINE(uint32_t)
     1882iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
     1883{
     1884    return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
     1885}
     1886# endif /* VBOX_STRICT */
     1887
     1888#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
     1889
     1890
     1891/**
     1892 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
     1893 */
     1894DECL_FORCE_INLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
     1895{
     1896    if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
     1897        return IEM_CIMPL_HIDDEN_ARGS;
     1898    if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
     1899        return 1;
     1900    return 0;
     1901}
     1902
     1903
     1904DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
     1905                                                     IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
     1906{
     1907    pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
     1908
     1909    pReNative->Core.aHstRegs[idxReg].enmWhat        = enmWhat;
     1910    pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
     1911    pReNative->Core.aHstRegs[idxReg].idxVar         = idxVar;
     1912    return (uint8_t)idxReg;
     1913}
     1914
     1915
     1916
     1917/*********************************************************************************************************************************
     1918*   Register Allocator (GPR)                                                                                                     *
     1919*********************************************************************************************************************************/
     1920
     1921/**
     1922 * Marks host register @a idxHstReg as containing a shadow copy of guest
     1923 * register @a enmGstReg.
     1924 *
     1925 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
     1926 * host register before calling.
     1927 */
     1928DECL_FORCE_INLINE(void)
     1929iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
     1930{
     1931    Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
     1932    Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
     1933    Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
     1934
     1935    pReNative->Core.aidxGstRegShadows[enmGstReg]       = idxHstReg;
     1936    pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
     1937    pReNative->Core.bmGstRegShadows                   |= RT_BIT_64(enmGstReg);
     1938    pReNative->Core.bmHstRegsWithGstShadow            |= RT_BIT_32(idxHstReg);
     1939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     1940    iemNativeDbgInfoAddNativeOffset(pReNative, off);
     1941    iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
     1942#else
     1943    RT_NOREF(off);
     1944#endif
     1945}
     1946
     1947
     1948/**
     1949 * Clear any guest register shadow claims from @a idxHstReg.
     1950 *
     1951 * The register does not need to be shadowing any guest registers.
     1952 */
     1953DECL_FORCE_INLINE(void)
     1954iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
     1955{
     1956    Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
     1957              == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
     1958           && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     1959    Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
     1960           == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
     1961
     1962#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     1963    uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
     1964    if (fGstRegs)
     1965    {
     1966        Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
     1967        iemNativeDbgInfoAddNativeOffset(pReNative, off);
     1968        while (fGstRegs)
     1969        {
     1970            unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
     1971            fGstRegs &= ~RT_BIT_64(iGstReg);
     1972            iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
     1973        }
     1974    }
     1975#else
     1976    RT_NOREF(off);
     1977#endif
     1978
     1979    pReNative->Core.bmHstRegsWithGstShadow            &= ~RT_BIT_32(idxHstReg);
     1980    pReNative->Core.bmGstRegShadows                   &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
     1981    pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
     1982}
     1983
     1984
     1985/**
     1986 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
     1987 * and global overview flags.
     1988 */
     1989DECL_FORCE_INLINE(void)
     1990iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
     1991{
     1992    Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     1993    Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
     1994              == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
     1995           && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     1996    Assert(pReNative->Core.bmGstRegShadows                    & RT_BIT_64(enmGstReg));
     1997    Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
     1998    Assert(pReNative->Core.bmHstRegsWithGstShadow             & RT_BIT_32(idxHstReg));
     1999
     2000#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     2001    iemNativeDbgInfoAddNativeOffset(pReNative, off);
     2002    iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
     2003#else
     2004    RT_NOREF(off);
     2005#endif
     2006
     2007    uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
     2008    pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
     2009    if (!fGstRegShadowsNew)
     2010        pReNative->Core.bmHstRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
     2011    pReNative->Core.bmGstRegShadows                   &= ~RT_BIT_64(enmGstReg);
     2012}
     2013
     2014
     2015#if 0 /* unused */
     2016/**
     2017 * Clear any guest register shadow claim for @a enmGstReg.
     2018 */
     2019DECL_FORCE_INLINE(void)
     2020iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
     2021{
     2022    Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     2023    if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
     2024    {
     2025        Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
     2026        iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
     2027    }
     2028}
     2029#endif
     2030
     2031
     2032/**
     2033 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
     2034 * as the new shadow of it.
     2035 *
     2036 * Unlike the other guest reg shadow helpers, this does the logging for you.
     2037 * However, it is the liveness state is not asserted here, the caller must do
     2038 * that.
     2039 */
     2040DECL_FORCE_INLINE(void)
     2041iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
     2042                                       IEMNATIVEGSTREG enmGstReg, uint32_t off)
     2043{
     2044    Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     2045    if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
     2046    {
     2047        uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
     2048        Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
     2049        if (idxHstRegOld == idxHstRegNew)
     2050            return;
     2051        Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
     2052               g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
     2053        iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
     2054    }
     2055    else
     2056        Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
     2057               g_aGstShadowInfo[enmGstReg].pszName));
     2058    iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
     2059}
     2060
     2061
     2062/**
     2063 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
     2064 * to @a idxRegTo.
     2065 */
     2066DECL_FORCE_INLINE(void)
     2067iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
     2068                                    IEMNATIVEGSTREG enmGstReg, uint32_t off)
     2069{
     2070    Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
     2071    Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
     2072    Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
     2073              == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
     2074           && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     2075    Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
     2076           == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
     2077    Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
     2078           == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
     2079
     2080    uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
     2081    pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
     2082    if (!fGstRegShadowsFrom)
     2083        pReNative->Core.bmHstRegsWithGstShadow          &= ~RT_BIT_32(idxRegFrom);
     2084    pReNative->Core.bmHstRegsWithGstShadow              |= RT_BIT_32(idxRegTo);
     2085    pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows   |= RT_BIT_64(enmGstReg);
     2086    pReNative->Core.aidxGstRegShadows[enmGstReg]         = idxRegTo;
     2087#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     2088    iemNativeDbgInfoAddNativeOffset(pReNative, off);
     2089    iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
     2090#else
     2091    RT_NOREF(off);
     2092#endif
     2093}
     2094
     2095
     2096/**
     2097 * Flushes any delayed guest register writes.
     2098 *
     2099 * This must be called prior to calling CImpl functions and any helpers that use
     2100 * the guest state (like raising exceptions) and such.
     2101 *
     2102 * This optimization has not yet been implemented.  The first target would be
     2103 * RIP updates, since these are the most common ones.
     2104 */
     2105DECL_INLINE_THROW(uint32_t)
     2106iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept = 0, bool fFlushShadows = true)
     2107{
     2108#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     2109    if (!(fGstShwExcept & kIemNativeGstReg_Pc))
     2110        return iemNativeRegFlushPendingWritesSlow(pReNative, off, fGstShwExcept, fFlushShadows);
     2111#else
     2112    RT_NOREF(pReNative, fGstShwExcept);
     2113#endif
     2114
     2115#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     2116    /** @todo r=bird: There must be a quicker way to check if anything needs doing here!  */
     2117    /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
     2118    return iemNativeRegFlushPendingWritesSlow(pReNative, off, fGstShwExcept, fFlushShadows);
     2119#else
     2120    RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
     2121    return off;
     2122#endif
     2123}
     2124
     2125
     2126
     2127/*********************************************************************************************************************************
     2128*   SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge)                            *
     2129*********************************************************************************************************************************/
     2130
     2131#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
     2132
     2133DECL_FORCE_INLINE(uint8_t)
     2134iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
     2135                              IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
     2136{
     2137    pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
     2138
     2139    pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat        = enmWhat;
     2140    pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
     2141    RT_NOREF(idxVar);
     2142    return idxSimdReg;
     2143}
     2144
     2145
     2146/**
     2147 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
     2148 * SIMD register @a enmGstSimdReg.
     2149 *
     2150 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
     2151 * host register before calling.
     2152 */
     2153DECL_FORCE_INLINE(void)
     2154iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
     2155                                       IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
     2156{
     2157    Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
     2158    Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
     2159    Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
     2160
     2161    pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]       = idxHstSimdReg;
     2162    pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
     2163    pReNative->Core.bmGstSimdRegShadows                        |= RT_BIT_64(enmGstSimdReg);
     2164    pReNative->Core.bmHstSimdRegsWithGstShadow                 |= RT_BIT_32(idxHstSimdReg);
     2165#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     2166    iemNativeDbgInfoAddNativeOffset(pReNative, off);
     2167    iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
     2168#else
     2169    RT_NOREF(off);
     2170#endif
     2171}
     2172
     2173
     2174/**
     2175 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
     2176 * to @a idxSimdRegTo.
     2177 */
     2178DECL_FORCE_INLINE(void)
     2179iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
     2180                                            IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
     2181{
     2182    Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
     2183    Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
     2184    Assert(      (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
     2185              == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
     2186           && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
     2187    Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
     2188           == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
     2189    Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
     2190           == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
     2191    Assert(   pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
     2192           == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
     2193
     2194    uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
     2195    pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
     2196    if (!fGstRegShadowsFrom)
     2197    {
     2198        pReNative->Core.bmHstSimdRegsWithGstShadow               &= ~RT_BIT_32(idxSimdRegFrom);
     2199        pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded    = kIemNativeGstSimdRegLdStSz_Invalid;
     2200    }
     2201    pReNative->Core.bmHstSimdRegsWithGstShadow                |= RT_BIT_32(idxSimdRegTo);
     2202    pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
     2203    pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]       = idxSimdRegTo;
     2204#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     2205    iemNativeDbgInfoAddNativeOffset(pReNative, off);
     2206    iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
     2207#else
     2208    RT_NOREF(off);
     2209#endif
     2210}
     2211
     2212
     2213/**
     2214 * Clear any guest register shadow claims from @a idxHstSimdReg.
     2215 *
     2216 * The register does not need to be shadowing any guest registers.
     2217 */
     2218DECL_FORCE_INLINE(void)
     2219iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
     2220{
     2221    Assert(      (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
     2222              == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
     2223           && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
     2224    Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
     2225           == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
     2226    Assert(   !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
     2227           && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
     2228
     2229#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
     2230    uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
     2231    if (fGstRegs)
     2232    {
     2233        Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
     2234        iemNativeDbgInfoAddNativeOffset(pReNative, off);
     2235        while (fGstRegs)
     2236        {
     2237            unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
     2238            fGstRegs &= ~RT_BIT_64(iGstReg);
     2239            iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
     2240        }
     2241    }
     2242#else
     2243    RT_NOREF(off);
     2244#endif
     2245
     2246    pReNative->Core.bmHstSimdRegsWithGstShadow        &= ~RT_BIT_32(idxHstSimdReg);
     2247    pReNative->Core.bmGstSimdRegShadows               &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
     2248    pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
     2249    pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
     2250}
     2251
     2252#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
     2253
     2254
     2255#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     2256/**
     2257 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
     2258 */
     2259DECL_INLINE_THROW(uint32_t) iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
     2260{
     2261    if (pReNative->Core.offPc)
     2262        return iemNativeEmitPcWritebackSlow(pReNative, off);
     2263    return off;
     2264}
     2265#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING  */
     2266
     2267
    15812268/** @} */
    15822269
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette