VirtualBox

Ignore:
Timestamp:
Mar 1, 2024 3:39:08 PM (11 months ago)
Author:
vboxsync
Message:

VMM/IEM: Moved iemNativeEmit_xor_r_r_efl and iemNativeEmitEFlagsForLogical into a separate file under the new target-x86 subdir. bugref:10376

Location:
trunk/src/VBox/VMM/VMMAll/target-x86
Files:
1 added
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h

    r103635 r103637  
    11/* $Id$ */
    22/** @file
    3  * IEM - Native Recompiler
    4  *
    5  * Logging group IEM_RE_NATIVE assignments:
    6  *      - Level 1  (Log)  : ...
    7  *      - Flow  (LogFlow) : ...
    8  *      - Level 2  (Log2) : Details calls as they're recompiled.
    9  *      - Level 3  (Log3) : Disassemble native code after recompiling.
    10  *      - Level 4  (Log4) : ...
    11  *      - Level 5  (Log5) : ...
    12  *      - Level 6  (Log6) : ...
    13  *      - Level 7  (Log7) : ...
    14  *      - Level 8  (Log8) : ...
    15  *      - Level 9  (Log9) : ...
    16  *      - Level 10 (Log10): ...
    17  *      - Level 11 (Log11): Variable allocator.
    18  *      - Level 12 (Log12): Register allocator.
     3 * IEM - Native Recompiler, x86 Target - Code Emitters.
    194 */
    205
    216/*
    22  * Copyright (C) 2023 Oracle and/or its affiliates.
     7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
    238 *
    249 * This file is part of VirtualBox base platform packages, as
     
    4126 */
    4227
    43 
    44 /*********************************************************************************************************************************
    45 *   Header Files                                                                                                                 *
    46 *********************************************************************************************************************************/
    47 #define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
    48 #define IEM_WITH_OPAQUE_DECODER_STATE
    49 #define VMCPU_INCL_CPUM_GST_CTX
    50 #define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
    51 #include <VBox/vmm/iem.h>
    52 #include <VBox/vmm/cpum.h>
    53 #include <VBox/vmm/dbgf.h>
    54 #include "IEMInternal.h"
    55 #include <VBox/vmm/vmcc.h>
    56 #include <VBox/log.h>
    57 #include <VBox/err.h>
    58 #include <VBox/dis.h>
    59 #include <VBox/param.h>
    60 #include <iprt/assert.h>
    61 #include <iprt/heap.h>
    62 #include <iprt/mem.h>
    63 #include <iprt/string.h>
    64 #if   defined(RT_ARCH_AMD64)
    65 # include <iprt/x86.h>
    66 #elif defined(RT_ARCH_ARM64)
    67 # include <iprt/armv8.h>
     28#ifndef VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
     29#define VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h
     30#ifndef RT_WITHOUT_PRAGMA_ONCE
     31# pragma once
    6832#endif
    69 
    70 #ifdef RT_OS_WINDOWS
    71 # include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
    72 extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
    73 extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
    74 #else
    75 # include <iprt/formats/dwarf.h>
    76 # if defined(RT_OS_DARWIN)
    77 #  include <libkern/OSCacheControl.h>
    78 #  define IEMNATIVE_USE_LIBUNWIND
    79 extern "C" void  __register_frame(const void *pvFde);
    80 extern "C" void  __deregister_frame(const void *pvFde);
    81 # else
    82 #  ifdef DEBUG_bird /** @todo not thread safe yet */
    83 #   define IEMNATIVE_USE_GDB_JIT
    84 #  endif
    85 #  ifdef IEMNATIVE_USE_GDB_JIT
    86 #   include <iprt/critsect.h>
    87 #   include <iprt/once.h>
    88 #   include <iprt/formats/elf64.h>
    89 #  endif
    90 extern "C" void  __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
    91 extern "C" void *__deregister_frame_info(void *pvBegin);           /* (returns pvObj from __register_frame_info call) */
    92 # endif
    93 #endif
    94 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    95 # include "/opt/local/include/capstone/capstone.h"
    96 #endif
    97 
    98 #include "IEMInline.h"
    99 #include "IEMThreadedFunctions.h"
    100 #include "IEMN8veRecompiler.h"
    101 #include "IEMN8veRecompilerEmit.h"
    102 #include "IEMN8veRecompilerTlbLookup.h"
    103 #include "IEMNativeFunctions.h"
    104 
    105 
    106 /*
    107  * Narrow down configs here to avoid wasting time on unused configs here.
    108  * Note! Same checks in IEMAllThrdRecompiler.cpp.
    109  */
    110 
    111 #ifndef IEM_WITH_CODE_TLB
    112 # error The code TLB must be enabled for the recompiler.
    113 #endif
    114 
    115 #ifndef IEM_WITH_DATA_TLB
    116 # error The data TLB must be enabled for the recompiler.
    117 #endif
    118 
    119 #ifndef IEM_WITH_SETJMP
    120 # error The setjmp approach must be enabled for the recompiler.
    121 #endif
    122 
    123 /** @todo eliminate this clang build hack. */
    124 #if RT_CLANG_PREREQ(4, 0)
    125 # pragma GCC diagnostic ignored "-Wunused-function"
    126 #endif
    127 
    128 
    129 /*********************************************************************************************************************************
    130 *   Internal Functions                                                                                                           *
    131 *********************************************************************************************************************************/
    132 #ifdef VBOX_STRICT
    133 static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    134                                                 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
    135 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
    136 #endif
    137 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    138 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
    139 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
    140 #endif
    141 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
    142 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
    143                                                             IEMNATIVEGSTREG enmGstReg, uint32_t off);
    144 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
    145 
    146 
    147 /*********************************************************************************************************************************
    148 *   Executable Memory Allocator                                                                                                  *
    149 *********************************************************************************************************************************/
    150 /** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    151  * Use an alternative chunk sub-allocator that does store internal data
    152  * in the chunk.
    153  *
    154  * Using the RTHeapSimple is not practial on newer darwin systems where
    155  * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
    156  * memory.  We would have to change the protection of the whole chunk for
    157  * every call to RTHeapSimple, which would be rather expensive.
    158  *
    159  * This alternative implemenation let restrict page protection modifications
    160  * to the pages backing the executable memory we just allocated.
    161  */
    162 #define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    163 /** The chunk sub-allocation unit size in bytes. */
    164 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE      128
    165 /** The chunk sub-allocation unit size as a shift factor. */
    166 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT     7
    167 
    168 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    169 # ifdef IEMNATIVE_USE_GDB_JIT
    170 #   define IEMNATIVE_USE_GDB_JIT_ET_DYN
    171 
    172 /** GDB JIT: Code entry.   */
    173 typedef struct GDBJITCODEENTRY
    174 {
    175     struct GDBJITCODEENTRY *pNext;
    176     struct GDBJITCODEENTRY *pPrev;
    177     uint8_t                *pbSymFile;
    178     uint64_t                cbSymFile;
    179 } GDBJITCODEENTRY;
    180 
    181 /** GDB JIT: Actions. */
    182 typedef enum GDBJITACTIONS : uint32_t
    183 {
    184     kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
    185 } GDBJITACTIONS;
    186 
    187 /** GDB JIT: Descriptor. */
    188 typedef struct GDBJITDESCRIPTOR
    189 {
    190     uint32_t            uVersion;
    191     GDBJITACTIONS       enmAction;
    192     GDBJITCODEENTRY    *pRelevant;
    193     GDBJITCODEENTRY    *pHead;
    194     /** Our addition: */
    195     GDBJITCODEENTRY    *pTail;
    196 } GDBJITDESCRIPTOR;
    197 
    198 /** GDB JIT: Our simple symbol file data. */
    199 typedef struct GDBJITSYMFILE
    200 {
    201     Elf64_Ehdr          EHdr;
    202 #  ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
    203     Elf64_Shdr          aShdrs[5];
    204 #  else
    205     Elf64_Shdr          aShdrs[7];
    206     Elf64_Phdr          aPhdrs[2];
    207 #  endif
    208     /** The dwarf ehframe data for the chunk. */
    209     uint8_t             abEhFrame[512];
    210     char                szzStrTab[128];
    211     Elf64_Sym           aSymbols[3];
    212 #  ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    213     Elf64_Sym           aDynSyms[2];
    214     Elf64_Dyn           aDyn[6];
    215 #  endif
    216 } GDBJITSYMFILE;
    217 
    218 extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
    219 extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
    220 
    221 /** Init once for g_IemNativeGdbJitLock. */
    222 static RTONCE     g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
    223 /** Init once for the critical section. */
    224 static RTCRITSECT g_IemNativeGdbJitLock;
    225 
    226 /** GDB reads the info here. */
    227 GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
    228 
    229 /** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
    230 DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
    231 {
    232     ASMNopPause();
    233 }
    234 
    235 /** @callback_method_impl{FNRTONCE} */
    236 static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
    237 {
    238     RT_NOREF(pvUser);
    239     return RTCritSectInit(&g_IemNativeGdbJitLock);
    240 }
    241 
    242 
    243 # endif /* IEMNATIVE_USE_GDB_JIT */
    244 
    245 /**
    246  * Per-chunk unwind info for non-windows hosts.
    247  */
    248 typedef struct IEMEXECMEMCHUNKEHFRAME
    249 {
    250 # ifdef IEMNATIVE_USE_LIBUNWIND
    251     /** The offset of the FDA into abEhFrame. */
    252     uintptr_t               offFda;
    253 # else
    254     /** 'struct object' storage area. */
    255     uint8_t                 abObject[1024];
    256 # endif
    257 #  ifdef IEMNATIVE_USE_GDB_JIT
    258 #   if 0
    259     /** The GDB JIT 'symbol file' data. */
    260     GDBJITSYMFILE           GdbJitSymFile;
    261 #   endif
    262     /** The GDB JIT list entry. */
    263     GDBJITCODEENTRY         GdbJitEntry;
    264 #  endif
    265     /** The dwarf ehframe data for the chunk. */
    266     uint8_t                 abEhFrame[512];
    267 } IEMEXECMEMCHUNKEHFRAME;
    268 /** Pointer to per-chunk info info for non-windows hosts. */
    269 typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
    270 #endif
    271 
    272 
    273 /**
    274  * An chunk of executable memory.
    275  */
    276 typedef struct IEMEXECMEMCHUNK
    277 {
    278 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    279     /** Number of free items in this chunk. */
    280     uint32_t                cFreeUnits;
    281     /** Hint were to start searching for free space in the allocation bitmap. */
    282     uint32_t                idxFreeHint;
    283 #else
    284     /** The heap handle. */
    285     RTHEAPSIMPLE            hHeap;
    286 #endif
    287     /** Pointer to the chunk. */
    288     void                   *pvChunk;
    289 #ifdef IN_RING3
    290     /**
    291      * Pointer to the unwind information.
    292      *
    293      * This is used during C++ throw and longjmp (windows and probably most other
    294      * platforms).  Some debuggers (windbg) makes use of it as well.
    295      *
    296      * Windows: This is allocated from hHeap on windows because (at least for
    297      *          AMD64) the UNWIND_INFO structure address in the
    298      *          RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
    299      *
    300      * Others:  Allocated from the regular heap to avoid unnecessary executable data
    301      *          structures.  This points to an IEMEXECMEMCHUNKEHFRAME structure. */
    302     void                   *pvUnwindInfo;
    303 #elif defined(IN_RING0)
    304     /** Allocation handle. */
    305     RTR0MEMOBJ              hMemObj;
    306 #endif
    307 } IEMEXECMEMCHUNK;
    308 /** Pointer to a memory chunk. */
    309 typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
    310 
    311 
    312 /**
    313  * Executable memory allocator for the native recompiler.
    314  */
    315 typedef struct IEMEXECMEMALLOCATOR
    316 {
    317     /** Magic value (IEMEXECMEMALLOCATOR_MAGIC).  */
    318     uint32_t                uMagic;
    319 
    320     /** The chunk size. */
    321     uint32_t                cbChunk;
    322     /** The maximum number of chunks. */
    323     uint32_t                cMaxChunks;
    324     /** The current number of chunks. */
    325     uint32_t                cChunks;
    326     /** Hint where to start looking for available memory. */
    327     uint32_t                idxChunkHint;
    328     /** Statistics: Current number of allocations. */
    329     uint32_t                cAllocations;
    330 
    331     /** The total amount of memory available. */
    332     uint64_t                cbTotal;
    333     /** Total amount of free memory. */
    334     uint64_t                cbFree;
    335     /** Total amount of memory allocated. */
    336     uint64_t                cbAllocated;
    337 
    338 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    339     /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
    340      *
    341      * Since the chunk size is a power of two and the minimum chunk size is a lot
    342      * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
    343      * require a whole number of uint64_t elements in the allocation bitmap.  So,
    344      * for sake of simplicity, they are allocated as one continous chunk for
    345      * simplicity/laziness. */
    346     uint64_t               *pbmAlloc;
    347     /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
    348     uint32_t                cUnitsPerChunk;
    349     /** Number of bitmap elements per chunk (for quickly locating the bitmap
    350      * portion corresponding to an chunk). */
    351     uint32_t                cBitmapElementsPerChunk;
    352 #else
    353     /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
    354      * @{ */
    355     /** The size of the heap internal block header.   This is used to adjust the
    356      * request memory size to make sure there is exacly enough room for a header at
    357      * the end of the blocks we allocate before the next 64 byte alignment line. */
    358     uint32_t                cbHeapBlockHdr;
    359     /** The size of initial heap allocation required make sure the first
    360      *  allocation is correctly aligned. */
    361     uint32_t                cbHeapAlignTweak;
    362     /** The alignment tweak allocation address. */
    363     void                   *pvAlignTweak;
    364     /** @} */
    365 #endif
    366 
    367 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    368     /** Pointer to the array of unwind info running parallel to aChunks (same
    369      * allocation as this structure, located after the bitmaps).
    370      * (For Windows, the structures must reside in 32-bit RVA distance to the
    371      * actual chunk, so they are allocated off the chunk.) */
    372     PIEMEXECMEMCHUNKEHFRAME paEhFrames;
    373 #endif
    374 
    375     /** The allocation chunks. */
    376     RT_FLEXIBLE_ARRAY_EXTENSION
    377     IEMEXECMEMCHUNK         aChunks[RT_FLEXIBLE_ARRAY];
    378 } IEMEXECMEMALLOCATOR;
    379 /** Pointer to an executable memory allocator. */
    380 typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
    381 
    382 /** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
    383 #define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
    384 
    385 
    386 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
    387 
    388 
    389 /**
    390  * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
    391  * the heap statistics.
    392  */
    393 static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
    394                                                uint32_t cbReq, uint32_t idxChunk)
    395 {
    396     pExecMemAllocator->cAllocations += 1;
    397     pExecMemAllocator->cbAllocated  += cbReq;
    398 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    399     pExecMemAllocator->cbFree       -= cbReq;
    400 #else
    401     pExecMemAllocator->cbFree       -= RT_ALIGN_32(cbReq, 64);
    402 #endif
    403     pExecMemAllocator->idxChunkHint  = idxChunk;
    404 
    405 #ifdef RT_OS_DARWIN
    406     /*
    407      * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
    408      * on darwin.  So, we mark the pages returned as read+write after alloc and
    409      * expect the caller to call iemExecMemAllocatorReadyForUse when done
    410      * writing to the allocation.
    411      *
    412      * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    413      * for details.
    414      */
    415     /** @todo detect if this is necessary... it wasn't required on 10.15 or
    416      *        whatever older version it was. */
    417     int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
    418     AssertRC(rc);
    419 #endif
    420 
    421     return pvRet;
    422 }
    423 
    424 
    425 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    426 static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
    427                                                 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
    428 {
    429     /*
    430      * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
    431      */
    432     Assert(!(cToScan & 63));
    433     Assert(!(idxFirst & 63));
    434     Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
    435     pbmAlloc += idxFirst / 64;
    436 
    437     /*
    438      * Scan the bitmap for cReqUnits of consequtive clear bits
    439      */
    440     /** @todo This can probably be done more efficiently for non-x86 systems. */
    441     int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
    442     while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
    443     {
    444         uint32_t idxAddBit = 1;
    445         while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
    446             idxAddBit++;
    447         if (idxAddBit >= cReqUnits)
    448         {
    449             ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
    450 
    451             PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
    452             pChunk->cFreeUnits -= cReqUnits;
    453             pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
    454 
    455             void * const pvRet  = (uint8_t *)pChunk->pvChunk
    456                                 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
    457 
    458             return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
    459                                                     cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
    460         }
    461 
    462         iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
    463     }
    464     return NULL;
    465 }
    466 #endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    467 
    468 
    469 static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
    470 {
    471 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    472     /*
    473      * Figure out how much to allocate.
    474      */
    475     uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    476     if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
    477     {
    478         uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    479         uint32_t const   idxHint  = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
    480         if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
    481         {
    482             void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
    483                                                              pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
    484             if (pvRet)
    485                 return pvRet;
    486         }
    487         return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
    488                                                   RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
    489                                                   cReqUnits, idxChunk);
    490     }
    491 #else
    492     void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
    493     if (pvRet)
    494         return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
    495 #endif
    496     return NULL;
    497 
    498 }
    499 
    500 
    501 /**
    502  * Allocates @a cbReq bytes of executable memory.
    503  *
    504  * @returns Pointer to the memory, NULL if out of memory or other problem
    505  *          encountered.
    506  * @param   pVCpu   The cross context virtual CPU structure of the calling
    507  *                  thread.
    508  * @param   cbReq   How many bytes are required.
    509  */
    510 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
    511 {
    512     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    513     AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
    514     AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
    515 
    516 
    517     for (unsigned iIteration = 0;; iIteration++)
    518     {
    519         /*
    520          * Adjust the request size so it'll fit the allocator alignment/whatnot.
    521          *
    522          * For the RTHeapSimple allocator this means to follow the logic described
    523          * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
    524          * existing chunks if we think we've got sufficient free memory around.
    525          *
    526          * While for the alternative one we just align it up to a whole unit size.
    527          */
    528 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    529         cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    530 #else
    531         cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    532 #endif
    533         if (cbReq <= pExecMemAllocator->cbFree)
    534         {
    535             uint32_t const cChunks      = pExecMemAllocator->cChunks;
    536             uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
    537             for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
    538             {
    539                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    540                 if (pvRet)
    541                     return pvRet;
    542             }
    543             for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
    544             {
    545                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    546                 if (pvRet)
    547                     return pvRet;
    548             }
    549         }
    550 
    551         /*
    552          * Can we grow it with another chunk?
    553          */
    554         if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
    555         {
    556             int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    557             AssertLogRelRCReturn(rc, NULL);
    558 
    559             uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
    560             void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    561             if (pvRet)
    562                 return pvRet;
    563             AssertFailed();
    564         }
    565 
    566         /*
    567          * Try prune native TBs once.
    568          */
    569         if (iIteration == 0)
    570             iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
    571         else
    572         {
    573             /** @todo stats...   */
    574             return NULL;
    575         }
    576     }
    577 
    578 }
    579 
    580 
    581 /** This is a hook that we may need later for changing memory protection back
    582  *  to readonly+exec */
    583 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
    584 {
    585 #ifdef RT_OS_DARWIN
    586     /* See iemExecMemAllocatorAllocTailCode for the explanation. */
    587     int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
    588     AssertRC(rc); RT_NOREF(pVCpu);
    589 
    590     /*
    591      * Flush the instruction cache:
    592      *      https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    593      */
    594     /* sys_dcache_flush(pv, cb); - not necessary */
    595     sys_icache_invalidate(pv, cb);
    596 #else
    597     RT_NOREF(pVCpu, pv, cb);
    598 #endif
    599 }
    600 
    601 
    602 /**
    603  * Frees executable memory.
    604  */
    605 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
    606 {
    607     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    608     Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
    609     Assert(pv);
    610 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    611     Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
    612 #else
    613     Assert(!((uintptr_t)pv & 63));
    614 #endif
    615 
    616     /* Align the size as we did when allocating the block. */
    617 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    618     cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    619 #else
    620     cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    621 #endif
    622 
    623     /* Free it / assert sanity. */
    624 #if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
    625     uint32_t const cChunks = pExecMemAllocator->cChunks;
    626     uint32_t const cbChunk = pExecMemAllocator->cbChunk;
    627     bool           fFound  = false;
    628     for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
    629     {
    630         uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
    631         fFound = offChunk < cbChunk;
    632         if (fFound)
    633         {
    634 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    635             uint32_t const idxFirst  = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    636             uint32_t const cReqUnits = (uint32_t)cb       >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    637 
    638             /* Check that it's valid and free it. */
    639             uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    640             AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
    641             for (uint32_t i = 1; i < cReqUnits; i++)
    642                 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
    643             ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
    644 
    645             pExecMemAllocator->aChunks[idxChunk].cFreeUnits  += cReqUnits;
    646             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = idxFirst;
    647 
    648             /* Update the stats. */
    649             pExecMemAllocator->cbAllocated  -= cb;
    650             pExecMemAllocator->cbFree       += cb;
    651             pExecMemAllocator->cAllocations -= 1;
    652             return;
    653 #else
    654             Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
    655             break;
    656 #endif
    657         }
    658     }
    659 # ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    660     AssertFailed();
    661 # else
    662     Assert(fFound);
    663 # endif
    664 #endif
    665 
    666 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    667     /* Update stats while cb is freshly calculated.*/
    668     pExecMemAllocator->cbAllocated  -= cb;
    669     pExecMemAllocator->cbFree       += RT_ALIGN_Z(cb, 64);
    670     pExecMemAllocator->cAllocations -= 1;
    671 
    672     /* Free it. */
    673     RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
    674 #endif
    675 }
    676 
    677 
    678 
    679 #ifdef IN_RING3
    680 # ifdef RT_OS_WINDOWS
    681 
    682 /**
    683  * Initializes the unwind info structures for windows hosts.
    684  */
    685 static int
    686 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    687                                                      void *pvChunk, uint32_t idxChunk)
    688 {
    689     RT_NOREF(pVCpu);
    690 
    691     /*
    692      * The AMD64 unwind opcodes.
    693      *
    694      * This is a program that starts with RSP after a RET instruction that
    695      * ends up in recompiled code, and the operations we describe here will
    696      * restore all non-volatile registers and bring RSP back to where our
    697      * RET address is.  This means it's reverse order from what happens in
    698      * the prologue.
    699      *
    700      * Note! Using a frame register approach here both because we have one
    701      *       and but mainly because the UWOP_ALLOC_LARGE argument values
    702      *       would be a pain to write initializers for.  On the positive
    703      *       side, we're impervious to changes in the the stack variable
    704      *       area can can deal with dynamic stack allocations if necessary.
    705      */
    706     static const IMAGE_UNWIND_CODE s_aOpcodes[] =
    707     {
    708         { { 16, IMAGE_AMD64_UWOP_SET_FPREG,     0 } },              /* RSP  = RBP - FrameOffset * 10 (0x60) */
    709         { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL,   0 } },              /* RSP += 8; */
    710         { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x15 } },   /* R15  = [RSP]; RSP += 8; */
    711         { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x14 } },   /* R14  = [RSP]; RSP += 8; */
    712         { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x13 } },   /* R13  = [RSP]; RSP += 8; */
    713         { {  8, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x12 } },   /* R12  = [RSP]; RSP += 8; */
    714         { {  7, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xDI } },   /* RDI  = [RSP]; RSP += 8; */
    715         { {  6, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xSI } },   /* RSI  = [RSP]; RSP += 8; */
    716         { {  5, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBX } },   /* RBX  = [RSP]; RSP += 8; */
    717         { {  4, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBP } },   /* RBP  = [RSP]; RSP += 8; */
    718     };
    719     union
    720     {
    721         IMAGE_UNWIND_INFO Info;
    722         uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
    723     } s_UnwindInfo =
    724     {
    725         {
    726             /* .Version = */        1,
    727             /* .Flags = */          0,
    728             /* .SizeOfProlog = */   16, /* whatever */
    729             /* .CountOfCodes = */   RT_ELEMENTS(s_aOpcodes),
    730             /* .FrameRegister = */  X86_GREG_xBP,
    731             /* .FrameOffset = */    (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
    732         }
    733     };
    734     AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
    735     AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
    736 
    737     /*
    738      * Calc how much space we need and allocate it off the exec heap.
    739      */
    740     unsigned const cFunctionEntries = 1;
    741     unsigned const cbUnwindInfo     = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
    742     unsigned const cbNeeded         = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
    743 #  ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    744     unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    745     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
    746         = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
    747 #  else
    748     unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    749                                     - pExecMemAllocator->cbHeapBlockHdr;
    750     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
    751                                                                                                        32 /*cbAlignment*/);
    752 #  endif
    753     AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
    754     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
    755 
    756     /*
    757      * Initialize the structures.
    758      */
    759     PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
    760 
    761     paFunctions[0].BeginAddress         = 0;
    762     paFunctions[0].EndAddress           = pExecMemAllocator->cbChunk;
    763     paFunctions[0].UnwindInfoAddress    = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
    764 
    765     memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
    766     memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
    767 
    768     /*
    769      * Register it.
    770      */
    771     uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
    772     AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
    773 
    774     return VINF_SUCCESS;
    775 }
    776 
    777 
    778 # else /* !RT_OS_WINDOWS */
    779 
    780 /**
    781  * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
    782  */
    783 DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
    784 {
    785     if (iValue >= 64)
    786     {
    787         Assert(iValue < 0x2000);
    788         *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
    789         *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
    790     }
    791     else if (iValue >= 0)
    792         *Ptr.pb++ = (uint8_t)iValue;
    793     else if (iValue > -64)
    794         *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
    795     else
    796     {
    797         Assert(iValue > -0x2000);
    798         *Ptr.pb++ = ((uint8_t)iValue & 0x7f)        | 0x80;
    799         *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
    800     }
    801     return Ptr;
    802 }
    803 
    804 
    805 /**
    806  * Emits an ULEB128 encoded value (up to 64-bit wide).
    807  */
    808 DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
    809 {
    810     while (uValue >= 0x80)
    811     {
    812         *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
    813         uValue  >>= 7;
    814     }
    815     *Ptr.pb++ = (uint8_t)uValue;
    816     return Ptr;
    817 }
    818 
    819 
    820 /**
    821  * Emits a CFA rule as register @a uReg + offset @a off.
    822  */
    823 DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    824 {
    825     *Ptr.pb++ = DW_CFA_def_cfa;
    826     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    827     Ptr = iemDwarfPutUleb128(Ptr, off);
    828     return Ptr;
    829 }
    830 
    831 
    832 /**
    833  * Emits a register (@a uReg) save location:
    834  *      CFA + @a off * data_alignment_factor
    835  */
    836 DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    837 {
    838     if (uReg < 0x40)
    839         *Ptr.pb++ = DW_CFA_offset | uReg;
    840     else
    841     {
    842         *Ptr.pb++ = DW_CFA_offset_extended;
    843         Ptr = iemDwarfPutUleb128(Ptr, uReg);
    844     }
    845     Ptr = iemDwarfPutUleb128(Ptr, off);
    846     return Ptr;
    847 }
    848 
    849 
    850 #  if 0 /* unused */
    851 /**
    852  * Emits a register (@a uReg) save location, using signed offset:
    853  *      CFA + @a offSigned * data_alignment_factor
    854  */
    855 DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
    856 {
    857     *Ptr.pb++ = DW_CFA_offset_extended_sf;
    858     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    859     Ptr = iemDwarfPutLeb128(Ptr, offSigned);
    860     return Ptr;
    861 }
    862 #  endif
    863 
    864 
    865 /**
    866  * Initializes the unwind info section for non-windows hosts.
    867  */
    868 static int
    869 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    870                                                      void *pvChunk, uint32_t idxChunk)
    871 {
    872     PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
    873     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
    874 
    875     RTPTRUNION Ptr = { pEhFrame->abEhFrame };
    876 
    877     /*
    878      * Generate the CIE first.
    879      */
    880 #  ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
    881     uint8_t const iDwarfVer = 3;
    882 #  else
    883     uint8_t const iDwarfVer = 4;
    884 #  endif
    885     RTPTRUNION const PtrCie = Ptr;
    886     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    887     *Ptr.pu32++ = 0 /*UINT32_MAX*/;                         /* I'm a CIE in .eh_frame speak. */
    888     *Ptr.pb++   = iDwarfVer;                                /* DwARF version */
    889     *Ptr.pb++   = 0;                                        /* Augmentation. */
    890     if (iDwarfVer >= 4)
    891     {
    892         *Ptr.pb++   = sizeof(uintptr_t);                    /* Address size. */
    893         *Ptr.pb++   = 0;                                    /* Segment selector size. */
    894     }
    895 #  ifdef RT_ARCH_AMD64
    896     Ptr = iemDwarfPutLeb128(Ptr, 1);                        /* Code alignment factor (LEB128 = 1). */
    897 #  else
    898     Ptr = iemDwarfPutLeb128(Ptr, 4);                        /* Code alignment factor (LEB128 = 4). */
    899 #  endif
    900     Ptr = iemDwarfPutLeb128(Ptr, -8);                       /* Data alignment factor (LEB128 = -8). */
    901 #  ifdef RT_ARCH_AMD64
    902     Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA);          /* Return address column (ULEB128) */
    903 #  elif defined(RT_ARCH_ARM64)
    904     Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR);          /* Return address column (ULEB128) */
    905 #  else
    906 #   error "port me"
    907 #  endif
    908     /* Initial instructions: */
    909 #  ifdef RT_ARCH_AMD64
    910     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16);   /* CFA     = RBP + 0x10 - first stack parameter */
    911     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA,  1);    /* Ret RIP = [CFA + 1*-8] */
    912     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2);    /* RBP     = [CFA + 2*-8] */
    913     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3);    /* RBX     = [CFA + 3*-8] */
    914     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4);    /* R12     = [CFA + 4*-8] */
    915     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5);    /* R13     = [CFA + 5*-8] */
    916     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6);    /* R14     = [CFA + 6*-8] */
    917     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7);    /* R15     = [CFA + 7*-8] */
    918 #  elif defined(RT_ARCH_ARM64)
    919 #   if 1
    920     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP,  16);   /* CFA     = BP + 0x10 - first stack parameter */
    921 #   else
    922     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP,  IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
    923 #   endif
    924     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR,   1);   /* Ret PC  = [CFA + 1*-8] */
    925     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP,   2);   /* Ret BP  = [CFA + 2*-8] */
    926     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28,  3);   /* X28     = [CFA + 3*-8] */
    927     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27,  4);   /* X27     = [CFA + 4*-8] */
    928     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26,  5);   /* X26     = [CFA + 5*-8] */
    929     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25,  6);   /* X25     = [CFA + 6*-8] */
    930     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24,  7);   /* X24     = [CFA + 7*-8] */
    931     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23,  8);   /* X23     = [CFA + 8*-8] */
    932     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22,  9);   /* X22     = [CFA + 9*-8] */
    933     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10);   /* X21     = [CFA +10*-8] */
    934     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11);   /* X20     = [CFA +11*-8] */
    935     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12);   /* X19     = [CFA +12*-8] */
    936     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    937     /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
    938 #  else
    939 #   error "port me"
    940 #  endif
    941     while ((Ptr.u - PtrCie.u) & 3)
    942         *Ptr.pb++ = DW_CFA_nop;
    943     /* Finalize the CIE size. */
    944     *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
    945 
    946     /*
    947      * Generate an FDE for the whole chunk area.
    948      */
    949 #  ifdef IEMNATIVE_USE_LIBUNWIND
    950     pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
    951 #  endif
    952     RTPTRUNION const PtrFde = Ptr;
    953     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    954     *Ptr.pu32   = Ptr.u - PtrCie.u;                         /* Negated self relative CIE address. */
    955     Ptr.pu32++;
    956     *Ptr.pu64++ = (uintptr_t)pvChunk;                       /* Absolute start PC of this FDE. */
    957     *Ptr.pu64++ = pExecMemAllocator->cbChunk;               /* PC range length for this PDE. */
    958 #  if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
    959     *Ptr.pb++ = DW_CFA_nop;
    960 #  endif
    961     while ((Ptr.u - PtrFde.u) & 3)
    962         *Ptr.pb++ = DW_CFA_nop;
    963     /* Finalize the FDE size. */
    964     *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
    965 
    966     /* Terminator entry. */
    967     *Ptr.pu32++ = 0;
    968     *Ptr.pu32++ = 0;            /* just to be sure... */
    969     Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
    970 
    971     /*
    972      * Register it.
    973      */
    974 #  ifdef IEMNATIVE_USE_LIBUNWIND
    975     __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
    976 #  else
    977     memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
    978     __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
    979 #  endif
    980 
    981 #  ifdef IEMNATIVE_USE_GDB_JIT
    982     /*
    983      * Now for telling GDB about this (experimental).
    984      *
    985      * This seems to work best with ET_DYN.
    986      */
    987     unsigned const cbNeeded        = sizeof(GDBJITSYMFILE);
    988 #   ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    989     unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    990     GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
    991 #   else
    992     unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    993                                    - pExecMemAllocator->cbHeapBlockHdr;
    994     GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
    995 #   endif
    996     AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
    997     unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
    998 
    999     RT_ZERO(*pSymFile);
    1000 
    1001     /*
    1002      * The ELF header:
    1003      */
    1004     pSymFile->EHdr.e_ident[0]           = ELFMAG0;
    1005     pSymFile->EHdr.e_ident[1]           = ELFMAG1;
    1006     pSymFile->EHdr.e_ident[2]           = ELFMAG2;
    1007     pSymFile->EHdr.e_ident[3]           = ELFMAG3;
    1008     pSymFile->EHdr.e_ident[EI_VERSION]  = EV_CURRENT;
    1009     pSymFile->EHdr.e_ident[EI_CLASS]    = ELFCLASS64;
    1010     pSymFile->EHdr.e_ident[EI_DATA]     = ELFDATA2LSB;
    1011     pSymFile->EHdr.e_ident[EI_OSABI]    = ELFOSABI_NONE;
    1012 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1013     pSymFile->EHdr.e_type               = ET_DYN;
    1014 #   else
    1015     pSymFile->EHdr.e_type               = ET_REL;
    1016 #   endif
    1017 #   ifdef RT_ARCH_AMD64
    1018     pSymFile->EHdr.e_machine            = EM_AMD64;
    1019 #   elif defined(RT_ARCH_ARM64)
    1020     pSymFile->EHdr.e_machine            = EM_AARCH64;
    1021 #   else
    1022 #    error "port me"
    1023 #   endif
    1024     pSymFile->EHdr.e_version            = 1; /*?*/
    1025     pSymFile->EHdr.e_entry              = 0;
    1026 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1027     pSymFile->EHdr.e_phoff              = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
    1028 #   else
    1029     pSymFile->EHdr.e_phoff              = 0;
    1030 #   endif
    1031     pSymFile->EHdr.e_shoff              = sizeof(pSymFile->EHdr);
    1032     pSymFile->EHdr.e_flags              = 0;
    1033     pSymFile->EHdr.e_ehsize             = sizeof(pSymFile->EHdr);
    1034 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1035     pSymFile->EHdr.e_phentsize          = sizeof(pSymFile->aPhdrs[0]);
    1036     pSymFile->EHdr.e_phnum              = RT_ELEMENTS(pSymFile->aPhdrs);
    1037 #   else
    1038     pSymFile->EHdr.e_phentsize          = 0;
    1039     pSymFile->EHdr.e_phnum              = 0;
    1040 #   endif
    1041     pSymFile->EHdr.e_shentsize          = sizeof(pSymFile->aShdrs[0]);
    1042     pSymFile->EHdr.e_shnum              = RT_ELEMENTS(pSymFile->aShdrs);
    1043     pSymFile->EHdr.e_shstrndx           = 0; /* set later */
    1044 
    1045     uint32_t offStrTab = 0;
    1046 #define APPEND_STR(a_szStr) do { \
    1047         memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
    1048         offStrTab += sizeof(a_szStr); \
    1049         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1050     } while (0)
    1051 #define APPEND_STR_FMT(a_szStr, ...) do { \
    1052         offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
    1053         offStrTab++; \
    1054         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1055     } while (0)
    1056 
    1057     /*
    1058      * Section headers.
    1059      */
    1060     /* Section header #0: NULL */
    1061     unsigned i = 0;
    1062     APPEND_STR("");
    1063     RT_ZERO(pSymFile->aShdrs[i]);
    1064     i++;
    1065 
    1066     /* Section header: .eh_frame */
    1067     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1068     APPEND_STR(".eh_frame");
    1069     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1070     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1071 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1072     pSymFile->aShdrs[i].sh_offset
    1073         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
    1074 #   else
    1075     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->abEhFrame[0];
    1076     pSymFile->aShdrs[i].sh_offset       = 0;
    1077 #   endif
    1078 
    1079     pSymFile->aShdrs[i].sh_size         = sizeof(pEhFrame->abEhFrame);
    1080     pSymFile->aShdrs[i].sh_link         = 0;
    1081     pSymFile->aShdrs[i].sh_info         = 0;
    1082     pSymFile->aShdrs[i].sh_addralign    = 1;
    1083     pSymFile->aShdrs[i].sh_entsize      = 0;
    1084     memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
    1085     i++;
    1086 
    1087     /* Section header: .shstrtab */
    1088     unsigned const iShStrTab = i;
    1089     pSymFile->EHdr.e_shstrndx           = iShStrTab;
    1090     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1091     APPEND_STR(".shstrtab");
    1092     pSymFile->aShdrs[i].sh_type         = SHT_STRTAB;
    1093     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1094 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1095     pSymFile->aShdrs[i].sh_offset
    1096         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1097 #   else
    1098     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->szzStrTab[0];
    1099     pSymFile->aShdrs[i].sh_offset       = 0;
    1100 #   endif
    1101     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->szzStrTab);
    1102     pSymFile->aShdrs[i].sh_link         = 0;
    1103     pSymFile->aShdrs[i].sh_info         = 0;
    1104     pSymFile->aShdrs[i].sh_addralign    = 1;
    1105     pSymFile->aShdrs[i].sh_entsize      = 0;
    1106     i++;
    1107 
    1108     /* Section header: .symbols */
    1109     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1110     APPEND_STR(".symtab");
    1111     pSymFile->aShdrs[i].sh_type         = SHT_SYMTAB;
    1112     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1113     pSymFile->aShdrs[i].sh_offset
    1114         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
    1115     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aSymbols);
    1116     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1117     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aSymbols);
    1118     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aSymbols[0].st_value);
    1119     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aSymbols[0]);
    1120     i++;
    1121 
    1122 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1123     /* Section header: .symbols */
    1124     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1125     APPEND_STR(".dynsym");
    1126     pSymFile->aShdrs[i].sh_type         = SHT_DYNSYM;
    1127     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1128     pSymFile->aShdrs[i].sh_offset
    1129         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1130     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDynSyms);
    1131     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1132     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aDynSyms);
    1133     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aDynSyms[0].st_value);
    1134     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDynSyms[0]);
    1135     i++;
    1136 #   endif
    1137 
    1138 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1139     /* Section header: .dynamic */
    1140     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1141     APPEND_STR(".dynamic");
    1142     pSymFile->aShdrs[i].sh_type         = SHT_DYNAMIC;
    1143     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1144     pSymFile->aShdrs[i].sh_offset
    1145         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1146     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDyn);
    1147     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1148     pSymFile->aShdrs[i].sh_info         = 0;
    1149     pSymFile->aShdrs[i].sh_addralign    = 1;
    1150     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDyn[0]);
    1151     i++;
    1152 #   endif
    1153 
    1154     /* Section header: .text */
    1155     unsigned const iShText = i;
    1156     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1157     APPEND_STR(".text");
    1158     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1159     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1160 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1161     pSymFile->aShdrs[i].sh_offset
    1162         = pSymFile->aShdrs[i].sh_addr   = sizeof(GDBJITSYMFILE);
    1163 #   else
    1164     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)(pSymFile + 1);
    1165     pSymFile->aShdrs[i].sh_offset       = 0;
    1166 #   endif
    1167     pSymFile->aShdrs[i].sh_size         = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
    1168     pSymFile->aShdrs[i].sh_link         = 0;
    1169     pSymFile->aShdrs[i].sh_info         = 0;
    1170     pSymFile->aShdrs[i].sh_addralign    = 1;
    1171     pSymFile->aShdrs[i].sh_entsize      = 0;
    1172     i++;
    1173 
    1174     Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
    1175 
    1176 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1177     /*
    1178      * The program headers:
    1179      */
    1180     /* Everything in a single LOAD segment: */
    1181     i = 0;
    1182     pSymFile->aPhdrs[i].p_type          = PT_LOAD;
    1183     pSymFile->aPhdrs[i].p_flags         = PF_X | PF_R;
    1184     pSymFile->aPhdrs[i].p_offset
    1185         = pSymFile->aPhdrs[i].p_vaddr
    1186         = pSymFile->aPhdrs[i].p_paddr   = 0;
    1187     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1188         = pSymFile->aPhdrs[i].p_memsz   = pExecMemAllocator->cbChunk - offSymFileInChunk;
    1189     pSymFile->aPhdrs[i].p_align         = HOST_PAGE_SIZE;
    1190     i++;
    1191     /* The .dynamic segment. */
    1192     pSymFile->aPhdrs[i].p_type          = PT_DYNAMIC;
    1193     pSymFile->aPhdrs[i].p_flags         = PF_R;
    1194     pSymFile->aPhdrs[i].p_offset
    1195         = pSymFile->aPhdrs[i].p_vaddr
    1196         = pSymFile->aPhdrs[i].p_paddr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1197     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1198         = pSymFile->aPhdrs[i].p_memsz   = sizeof(pSymFile->aDyn);
    1199     pSymFile->aPhdrs[i].p_align         = sizeof(pSymFile->aDyn[0].d_tag);
    1200     i++;
    1201 
    1202     Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
    1203 
    1204     /*
    1205      * The dynamic section:
    1206      */
    1207     i = 0;
    1208     pSymFile->aDyn[i].d_tag             = DT_SONAME;
    1209     pSymFile->aDyn[i].d_un.d_val        = offStrTab;
    1210     APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
    1211     i++;
    1212     pSymFile->aDyn[i].d_tag             = DT_STRTAB;
    1213     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1214     i++;
    1215     pSymFile->aDyn[i].d_tag             = DT_STRSZ;
    1216     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->szzStrTab);
    1217     i++;
    1218     pSymFile->aDyn[i].d_tag             = DT_SYMTAB;
    1219     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1220     i++;
    1221     pSymFile->aDyn[i].d_tag             = DT_SYMENT;
    1222     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->aDynSyms[0]);
    1223     i++;
    1224     pSymFile->aDyn[i].d_tag             = DT_NULL;
    1225     i++;
    1226     Assert(i == RT_ELEMENTS(pSymFile->aDyn));
    1227 #   endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
    1228 
    1229     /*
    1230      * Symbol tables:
    1231      */
    1232     /** @todo gdb doesn't seem to really like this ...   */
    1233     i = 0;
    1234     pSymFile->aSymbols[i].st_name       = 0;
    1235     pSymFile->aSymbols[i].st_shndx      = SHN_UNDEF;
    1236     pSymFile->aSymbols[i].st_value      = 0;
    1237     pSymFile->aSymbols[i].st_size       = 0;
    1238     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
    1239     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1240 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1241     pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
    1242 #   endif
    1243     i++;
    1244 
    1245     pSymFile->aSymbols[i].st_name       = 0;
    1246     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1247     pSymFile->aSymbols[i].st_value      = 0;
    1248     pSymFile->aSymbols[i].st_size       = 0;
    1249     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
    1250     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1251     i++;
    1252 
    1253     pSymFile->aSymbols[i].st_name       = offStrTab;
    1254     APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
    1255 #   if 0
    1256     pSymFile->aSymbols[i].st_shndx      = iShText;
    1257     pSymFile->aSymbols[i].st_value      = 0;
    1258 #   else
    1259     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1260     pSymFile->aSymbols[i].st_value      = (uintptr_t)(pSymFile + 1);
    1261 #   endif
    1262     pSymFile->aSymbols[i].st_size       = pSymFile->aShdrs[iShText].sh_size;
    1263     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
    1264     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1265 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1266     pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
    1267     pSymFile->aDynSyms[1].st_value      = (uintptr_t)(pSymFile + 1);
    1268 #   endif
    1269     i++;
    1270 
    1271     Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
    1272     Assert(offStrTab < sizeof(pSymFile->szzStrTab));
    1273 
    1274     /*
    1275      * The GDB JIT entry and informing GDB.
    1276      */
    1277     pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
    1278 #   if 1
    1279     pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
    1280 #   else
    1281     pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
    1282 #   endif
    1283 
    1284     RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
    1285     RTCritSectEnter(&g_IemNativeGdbJitLock);
    1286     pEhFrame->GdbJitEntry.pNext      = NULL;
    1287     pEhFrame->GdbJitEntry.pPrev      = __jit_debug_descriptor.pTail;
    1288     if (__jit_debug_descriptor.pTail)
    1289         __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
    1290     else
    1291         __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
    1292     __jit_debug_descriptor.pTail     = &pEhFrame->GdbJitEntry;
    1293     __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
    1294 
    1295     /* Notify GDB: */
    1296     __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
    1297     __jit_debug_register_code();
    1298     __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
    1299     RTCritSectLeave(&g_IemNativeGdbJitLock);
    1300 
    1301 #  else  /* !IEMNATIVE_USE_GDB_JIT */
    1302     RT_NOREF(pVCpu);
    1303 #  endif /* !IEMNATIVE_USE_GDB_JIT */
    1304 
    1305     return VINF_SUCCESS;
    1306 }
    1307 
    1308 # endif /* !RT_OS_WINDOWS */
    1309 #endif /* IN_RING3 */
    1310 
    1311 
    1312 /**
    1313  * Adds another chunk to the executable memory allocator.
    1314  *
    1315  * This is used by the init code for the initial allocation and later by the
    1316  * regular allocator function when it's out of memory.
    1317  */
    1318 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
    1319 {
    1320     /* Check that we've room for growth. */
    1321     uint32_t const idxChunk = pExecMemAllocator->cChunks;
    1322     AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
    1323 
    1324     /* Allocate a chunk. */
    1325 #ifdef RT_OS_DARWIN
    1326     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
    1327 #else
    1328     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
    1329 #endif
    1330     AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
    1331 
    1332 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1333     int rc = VINF_SUCCESS;
    1334 #else
    1335     /* Initialize the heap for the chunk. */
    1336     RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
    1337     int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
    1338     AssertRC(rc);
    1339     if (RT_SUCCESS(rc))
    1340     {
    1341         /*
    1342          * We want the memory to be aligned on 64 byte, so the first time thru
    1343          * here we do some exploratory allocations to see how we can achieve this.
    1344          * On subsequent runs we only make an initial adjustment allocation, if
    1345          * necessary.
    1346          *
    1347          * Since we own the heap implementation, we know that the internal block
    1348          * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
    1349          * so all we need to wrt allocation size adjustments is to add 32 bytes
    1350          * to the size, align up by 64 bytes, and subtract 32 bytes.
    1351          *
    1352          * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
    1353          * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
    1354          * allocation to force subsequent allocations to return 64 byte aligned
    1355          * user areas.
    1356          */
    1357         if (!pExecMemAllocator->cbHeapBlockHdr)
    1358         {
    1359             pExecMemAllocator->cbHeapBlockHdr   = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
    1360             pExecMemAllocator->cbHeapAlignTweak = 64;
    1361             pExecMemAllocator->pvAlignTweak     = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
    1362                                                                     32 /*cbAlignment*/);
    1363             AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
    1364 
    1365             void *pvTest1 = RTHeapSimpleAlloc(hHeap,
    1366                                                 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
    1367                                               - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
    1368             AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
    1369             AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
    1370 
    1371             void *pvTest2 = RTHeapSimpleAlloc(hHeap,
    1372                                                 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
    1373                                               - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
    1374             AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
    1375             AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
    1376 
    1377             RTHeapSimpleFree(hHeap, pvTest2);
    1378             RTHeapSimpleFree(hHeap, pvTest1);
    1379         }
    1380         else
    1381         {
    1382             pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap,  pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
    1383             AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
    1384         }
    1385         if (RT_SUCCESS(rc))
    1386 #endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    1387         {
    1388             /*
    1389              * Add the chunk.
    1390              *
    1391              * This must be done before the unwind init so windows can allocate
    1392              * memory from the chunk when using the alternative sub-allocator.
    1393              */
    1394             pExecMemAllocator->aChunks[idxChunk].pvChunk      = pvChunk;
    1395 #ifdef IN_RING3
    1396             pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
    1397 #endif
    1398 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1399             pExecMemAllocator->aChunks[idxChunk].hHeap        = hHeap;
    1400 #else
    1401             pExecMemAllocator->aChunks[idxChunk].cFreeUnits   = pExecMemAllocator->cUnitsPerChunk;
    1402             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = 0;
    1403             memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1404                    0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1405 #endif
    1406 
    1407             pExecMemAllocator->cChunks      = idxChunk + 1;
    1408             pExecMemAllocator->idxChunkHint = idxChunk;
    1409 
    1410 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1411             pExecMemAllocator->cbTotal     += pExecMemAllocator->cbChunk;
    1412             pExecMemAllocator->cbFree      += pExecMemAllocator->cbChunk;
    1413 #else
    1414             size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
    1415             pExecMemAllocator->cbTotal     += cbFree;
    1416             pExecMemAllocator->cbFree      += cbFree;
    1417 #endif
    1418 
    1419 #ifdef IN_RING3
    1420             /*
    1421              * Initialize the unwind information (this cannot really fail atm).
    1422              * (This sets pvUnwindInfo.)
    1423              */
    1424             rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
    1425             if (RT_SUCCESS(rc))
    1426 #endif
    1427             {
    1428                 return VINF_SUCCESS;
    1429             }
    1430 
    1431 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1432             /* Just in case the impossible happens, undo the above up: */
    1433             pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
    1434             pExecMemAllocator->cbFree  -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1435             pExecMemAllocator->cChunks  = idxChunk;
    1436             memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1437                    0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1438             pExecMemAllocator->aChunks[idxChunk].pvChunk    = NULL;
    1439             pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
    1440 #endif
    1441         }
    1442 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1443     }
    1444 #endif
    1445     RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
    1446     RT_NOREF(pVCpu);
    1447     return rc;
    1448 }
    1449 
    1450 
    1451 /**
    1452  * Initializes the executable memory allocator for native recompilation on the
    1453  * calling EMT.
    1454  *
    1455  * @returns VBox status code.
    1456  * @param   pVCpu       The cross context virtual CPU structure of the calling
    1457  *                      thread.
    1458  * @param   cbMax       The max size of the allocator.
    1459  * @param   cbInitial   The initial allocator size.
    1460  * @param   cbChunk     The chunk size, 0 or UINT32_MAX for default (@a cbMax
    1461  *                      dependent).
    1462  */
    1463 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
    1464 {
    1465     /*
    1466      * Validate input.
    1467      */
    1468     AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
    1469     AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
    1470     AssertLogRelMsgReturn(   cbChunk != UINT32_MAX
    1471                           || cbChunk == 0
    1472                           || (   RT_IS_POWER_OF_TWO(cbChunk)
    1473                               && cbChunk >= _1M
    1474                               && cbChunk <= _256M
    1475                               && cbChunk <= cbMax),
    1476                           ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
    1477                           VERR_OUT_OF_RANGE);
    1478 
    1479     /*
    1480      * Adjust/figure out the chunk size.
    1481      */
    1482     if (cbChunk == 0 || cbChunk == UINT32_MAX)
    1483     {
    1484         if (cbMax >= _256M)
    1485             cbChunk = _64M;
    1486         else
    1487         {
    1488             if (cbMax < _16M)
    1489                 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
    1490             else
    1491                 cbChunk = (uint32_t)cbMax / 4;
    1492             if (!RT_IS_POWER_OF_TWO(cbChunk))
    1493                 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
    1494         }
    1495     }
    1496 
    1497     if (cbChunk > cbMax)
    1498         cbMax = cbChunk;
    1499     else
    1500         cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
    1501     uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
    1502     AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
    1503 
    1504     /*
    1505      * Allocate and initialize the allocatore instance.
    1506      */
    1507     size_t       cbNeeded   = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
    1508 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1509     size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1510     size_t const cbBitmap   = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
    1511     cbNeeded += cbBitmap * cMaxChunks;
    1512     AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
    1513     Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
    1514 #endif
    1515 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1516     size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1517     cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
    1518 #endif
    1519     PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
    1520     AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
    1521                           VERR_NO_MEMORY);
    1522     pExecMemAllocator->uMagic       = IEMEXECMEMALLOCATOR_MAGIC;
    1523     pExecMemAllocator->cbChunk      = cbChunk;
    1524     pExecMemAllocator->cMaxChunks   = cMaxChunks;
    1525     pExecMemAllocator->cChunks      = 0;
    1526     pExecMemAllocator->idxChunkHint = 0;
    1527     pExecMemAllocator->cAllocations = 0;
    1528     pExecMemAllocator->cbTotal      = 0;
    1529     pExecMemAllocator->cbFree       = 0;
    1530     pExecMemAllocator->cbAllocated  = 0;
    1531 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1532     pExecMemAllocator->pbmAlloc                 = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
    1533     pExecMemAllocator->cUnitsPerChunk           = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1534     pExecMemAllocator->cBitmapElementsPerChunk  = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
    1535     memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
    1536 #endif
    1537 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1538     pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
    1539 #endif
    1540     for (uint32_t i = 0; i < cMaxChunks; i++)
    1541     {
    1542 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1543         pExecMemAllocator->aChunks[i].cFreeUnits   = 0;
    1544         pExecMemAllocator->aChunks[i].idxFreeHint  = 0;
    1545 #else
    1546         pExecMemAllocator->aChunks[i].hHeap        = NIL_RTHEAPSIMPLE;
    1547 #endif
    1548         pExecMemAllocator->aChunks[i].pvChunk      = NULL;
    1549 #ifdef IN_RING0
    1550         pExecMemAllocator->aChunks[i].hMemObj      = NIL_RTR0MEMOBJ;
    1551 #else
    1552         pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
    1553 #endif
    1554     }
    1555     pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
    1556 
    1557     /*
    1558      * Do the initial allocations.
    1559      */
    1560     while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
    1561     {
    1562         int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    1563         AssertLogRelRCReturn(rc, rc);
    1564     }
    1565 
    1566     pExecMemAllocator->idxChunkHint = 0;
    1567 
    1568     return VINF_SUCCESS;
    1569 }
    1570 
    1571 
    1572 /*********************************************************************************************************************************
    1573 *   Native Recompilation                                                                                                         *
    1574 *********************************************************************************************************************************/
    1575 
    1576 
    1577 /**
    1578  * Used by TB code when encountering a non-zero status or rcPassUp after a call.
    1579  */
    1580 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
    1581 {
    1582     pVCpu->iem.s.cInstructions += idxInstr;
    1583     return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
    1584 }
    1585 
    1586 
    1587 /**
    1588  * Used by TB code when it wants to raise a \#GP(0).
    1589  */
    1590 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
    1591 {
    1592     iemRaiseGeneralProtectionFault0Jmp(pVCpu);
    1593 #ifndef _MSC_VER
    1594     return VINF_IEM_RAISED_XCPT; /* not reached */
    1595 #endif
    1596 }
    1597 
    1598 
    1599 /**
    1600  * Used by TB code when it wants to raise a \#NM.
    1601  */
    1602 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
    1603 {
    1604     iemRaiseDeviceNotAvailableJmp(pVCpu);
    1605 #ifndef _MSC_VER
    1606     return VINF_IEM_RAISED_XCPT; /* not reached */
    1607 #endif
    1608 }
    1609 
    1610 
    1611 /**
    1612  * Used by TB code when it wants to raise a \#UD.
    1613  */
    1614 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
    1615 {
    1616     iemRaiseUndefinedOpcodeJmp(pVCpu);
    1617 #ifndef _MSC_VER
    1618     return VINF_IEM_RAISED_XCPT; /* not reached */
    1619 #endif
    1620 }
    1621 
    1622 
    1623 /**
    1624  * Used by TB code when detecting opcode changes.
    1625  * @see iemThreadeFuncWorkerObsoleteTb
    1626  */
    1627 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
    1628 {
    1629     /* We set fSafeToFree to false where as we're being called in the context
    1630        of a TB callback function, which for native TBs means we cannot release
    1631        the executable memory till we've returned our way back to iemTbExec as
    1632        that return path codes via the native code generated for the TB. */
    1633     Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
    1634     iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
    1635     return VINF_IEM_REEXEC_BREAK;
    1636 }
    1637 
    1638 
    1639 /**
    1640  * Used by TB code when we need to switch to a TB with CS.LIM checking.
    1641  */
    1642 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
    1643 {
    1644     Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
    1645           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1646           (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
    1647           pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
    1648     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
    1649     return VINF_IEM_REEXEC_BREAK;
    1650 }
    1651 
    1652 
    1653 /**
    1654  * Used by TB code when we missed a PC check after a branch.
    1655  */
    1656 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
    1657 {
    1658     Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
    1659           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1660           pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
    1661           pVCpu->iem.s.pbInstrBuf));
    1662     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
    1663     return VINF_IEM_REEXEC_BREAK;
    1664 }
    1665 
    1666 
    1667 
    1668 /*********************************************************************************************************************************
    1669 *   Helpers: Segmented memory fetches and stores.                                                                                *
    1670 *********************************************************************************************************************************/
    1671 
    1672 /**
    1673  * Used by TB code to load unsigned 8-bit data w/ segmentation.
    1674  */
    1675 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1676 {
    1677 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1678     return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1679 #else
    1680     return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1681 #endif
    1682 }
    1683 
    1684 
    1685 /**
    1686  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1687  * to 16 bits.
    1688  */
    1689 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1690 {
    1691 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1692     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1693 #else
    1694     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1695 #endif
    1696 }
    1697 
    1698 
    1699 /**
    1700  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1701  * to 32 bits.
    1702  */
    1703 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1704 {
    1705 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1706     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1707 #else
    1708     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1709 #endif
    1710 }
    1711 
    1712 /**
    1713  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1714  * to 64 bits.
    1715  */
    1716 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1717 {
    1718 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1719     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1720 #else
    1721     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1722 #endif
    1723 }
    1724 
    1725 
    1726 /**
    1727  * Used by TB code to load unsigned 16-bit data w/ segmentation.
    1728  */
    1729 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1730 {
    1731 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1732     return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1733 #else
    1734     return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1735 #endif
    1736 }
    1737 
    1738 
    1739 /**
    1740  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1741  * to 32 bits.
    1742  */
    1743 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1744 {
    1745 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1746     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1747 #else
    1748     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1749 #endif
    1750 }
    1751 
    1752 
    1753 /**
    1754  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1755  * to 64 bits.
    1756  */
    1757 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1758 {
    1759 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1760     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1761 #else
    1762     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1763 #endif
    1764 }
    1765 
    1766 
    1767 /**
    1768  * Used by TB code to load unsigned 32-bit data w/ segmentation.
    1769  */
    1770 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1771 {
    1772 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1773     return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1774 #else
    1775     return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
    1776 #endif
    1777 }
    1778 
    1779 
    1780 /**
    1781  * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
    1782  * to 64 bits.
    1783  */
    1784 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1785 {
    1786 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1787     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1788 #else
    1789     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
    1790 #endif
    1791 }
    1792 
    1793 
    1794 /**
    1795  * Used by TB code to load unsigned 64-bit data w/ segmentation.
    1796  */
    1797 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1798 {
    1799 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1800     return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1801 #else
    1802     return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
    1803 #endif
    1804 }
    1805 
    1806 
    1807 /**
    1808  * Used by TB code to store unsigned 8-bit data w/ segmentation.
    1809  */
    1810 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
    1811 {
    1812 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1813     iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
    1814 #else
    1815     iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
    1816 #endif
    1817 }
    1818 
    1819 
    1820 /**
    1821  * Used by TB code to store unsigned 16-bit data w/ segmentation.
    1822  */
    1823 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
    1824 {
    1825 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1826     iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
    1827 #else
    1828     iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
    1829 #endif
    1830 }
    1831 
    1832 
    1833 /**
    1834  * Used by TB code to store unsigned 32-bit data w/ segmentation.
    1835  */
    1836 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
    1837 {
    1838 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1839     iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
    1840 #else
    1841     iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
    1842 #endif
    1843 }
    1844 
    1845 
    1846 /**
    1847  * Used by TB code to store unsigned 64-bit data w/ segmentation.
    1848  */
    1849 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
    1850 {
    1851 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1852     iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
    1853 #else
    1854     iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
    1855 #endif
    1856 }
    1857 
    1858 
    1859 
    1860 /**
    1861  * Used by TB code to store an unsigned 16-bit value onto a generic stack.
    1862  */
    1863 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    1864 {
    1865 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1866     iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
    1867 #else
    1868     iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
    1869 #endif
    1870 }
    1871 
    1872 
    1873 /**
    1874  * Used by TB code to store an unsigned 32-bit value onto a generic stack.
    1875  */
    1876 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    1877 {
    1878 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1879     iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
    1880 #else
    1881     iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
    1882 #endif
    1883 }
    1884 
    1885 
    1886 /**
    1887  * Used by TB code to store an 32-bit selector value onto a generic stack.
    1888  *
    1889  * Intel CPUs doesn't do write a whole dword, thus the special function.
    1890  */
    1891 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    1892 {
    1893 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1894     iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
    1895 #else
    1896     iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
    1897 #endif
    1898 }
    1899 
    1900 
    1901 /**
    1902  * Used by TB code to push unsigned 64-bit value onto a generic stack.
    1903  */
    1904 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    1905 {
    1906 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1907     iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
    1908 #else
    1909     iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
    1910 #endif
    1911 }
    1912 
    1913 
    1914 /**
    1915  * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
    1916  */
    1917 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1918 {
    1919 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1920     return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
    1921 #else
    1922     return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
    1923 #endif
    1924 }
    1925 
    1926 
    1927 /**
    1928  * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
    1929  */
    1930 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1931 {
    1932 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1933     return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
    1934 #else
    1935     return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
    1936 #endif
    1937 }
    1938 
    1939 
    1940 /**
    1941  * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
    1942  */
    1943 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1944 {
    1945 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1946     return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
    1947 #else
    1948     return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
    1949 #endif
    1950 }
    1951 
    1952 
    1953 
    1954 /*********************************************************************************************************************************
    1955 *   Helpers: Flat memory fetches and stores.                                                                                     *
    1956 *********************************************************************************************************************************/
    1957 
    1958 /**
    1959  * Used by TB code to load unsigned 8-bit data w/ flat address.
    1960  * @note Zero extending the value to 64-bit to simplify assembly.
    1961  */
    1962 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1963 {
    1964 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1965     return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    1966 #else
    1967     return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    1968 #endif
    1969 }
    1970 
    1971 
    1972 /**
    1973  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    1974  * to 16 bits.
    1975  * @note Zero extending the value to 64-bit to simplify assembly.
    1976  */
    1977 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1978 {
    1979 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1980     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    1981 #else
    1982     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    1983 #endif
    1984 }
    1985 
    1986 
    1987 /**
    1988  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    1989  * to 32 bits.
    1990  * @note Zero extending the value to 64-bit to simplify assembly.
    1991  */
    1992 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1993 {
    1994 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1995     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    1996 #else
    1997     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    1998 #endif
    1999 }
    2000 
    2001 
    2002 /**
    2003  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    2004  * to 64 bits.
    2005  */
    2006 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2007 {
    2008 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2009     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2010 #else
    2011     return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2012 #endif
    2013 }
    2014 
    2015 
    2016 /**
    2017  * Used by TB code to load unsigned 16-bit data w/ flat address.
    2018  * @note Zero extending the value to 64-bit to simplify assembly.
    2019  */
    2020 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2021 {
    2022 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2023     return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2024 #else
    2025     return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2026 #endif
    2027 }
    2028 
    2029 
    2030 /**
    2031  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    2032  * to 32 bits.
    2033  * @note Zero extending the value to 64-bit to simplify assembly.
    2034  */
    2035 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2036 {
    2037 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2038     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2039 #else
    2040     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2041 #endif
    2042 }
    2043 
    2044 
    2045 /**
    2046  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    2047  * to 64 bits.
    2048  * @note Zero extending the value to 64-bit to simplify assembly.
    2049  */
    2050 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2051 {
    2052 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2053     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2054 #else
    2055     return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2056 #endif
    2057 }
    2058 
    2059 
    2060 /**
    2061  * Used by TB code to load unsigned 32-bit data w/ flat address.
    2062  * @note Zero extending the value to 64-bit to simplify assembly.
    2063  */
    2064 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2065 {
    2066 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2067     return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2068 #else
    2069     return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
    2070 #endif
    2071 }
    2072 
    2073 
    2074 /**
    2075  * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
    2076  * to 64 bits.
    2077  * @note Zero extending the value to 64-bit to simplify assembly.
    2078  */
    2079 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2080 {
    2081 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2082     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2083 #else
    2084     return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
    2085 #endif
    2086 }
    2087 
    2088 
    2089 /**
    2090  * Used by TB code to load unsigned 64-bit data w/ flat address.
    2091  */
    2092 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2093 {
    2094 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2095     return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2096 #else
    2097     return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
    2098 #endif
    2099 }
    2100 
    2101 
    2102 /**
    2103  * Used by TB code to store unsigned 8-bit data w/ flat address.
    2104  */
    2105 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
    2106 {
    2107 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2108     iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
    2109 #else
    2110     iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
    2111 #endif
    2112 }
    2113 
    2114 
    2115 /**
    2116  * Used by TB code to store unsigned 16-bit data w/ flat address.
    2117  */
    2118 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2119 {
    2120 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2121     iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
    2122 #else
    2123     iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
    2124 #endif
    2125 }
    2126 
    2127 
    2128 /**
    2129  * Used by TB code to store unsigned 32-bit data w/ flat address.
    2130  */
    2131 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2132 {
    2133 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2134     iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
    2135 #else
    2136     iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
    2137 #endif
    2138 }
    2139 
    2140 
    2141 /**
    2142  * Used by TB code to store unsigned 64-bit data w/ flat address.
    2143  */
    2144 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2145 {
    2146 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2147     iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
    2148 #else
    2149     iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
    2150 #endif
    2151 }
    2152 
    2153 
    2154 
    2155 /**
    2156  * Used by TB code to store an unsigned 16-bit value onto a flat stack.
    2157  */
    2158 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2159 {
    2160 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2161     iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
    2162 #else
    2163     iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
    2164 #endif
    2165 }
    2166 
    2167 
    2168 /**
    2169  * Used by TB code to store an unsigned 32-bit value onto a flat stack.
    2170  */
    2171 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2172 {
    2173 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2174     iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
    2175 #else
    2176     iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
    2177 #endif
    2178 }
    2179 
    2180 
    2181 /**
    2182  * Used by TB code to store a segment selector value onto a flat stack.
    2183  *
    2184  * Intel CPUs doesn't do write a whole dword, thus the special function.
    2185  */
    2186 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2187 {
    2188 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2189     iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
    2190 #else
    2191     iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
    2192 #endif
    2193 }
    2194 
    2195 
    2196 /**
    2197  * Used by TB code to store an unsigned 64-bit value onto a flat stack.
    2198  */
    2199 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2200 {
    2201 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2202     iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
    2203 #else
    2204     iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
    2205 #endif
    2206 }
    2207 
    2208 
    2209 /**
    2210  * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
    2211  */
    2212 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2213 {
    2214 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2215     return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
    2216 #else
    2217     return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
    2218 #endif
    2219 }
    2220 
    2221 
    2222 /**
    2223  * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
    2224  */
    2225 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2226 {
    2227 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2228     return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
    2229 #else
    2230     return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
    2231 #endif
    2232 }
    2233 
    2234 
    2235 /**
    2236  * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
    2237  */
    2238 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2239 {
    2240 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2241     return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
    2242 #else
    2243     return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
    2244 #endif
    2245 }
    2246 
    2247 
    2248 
    2249 /*********************************************************************************************************************************
    2250 *   Helpers: Segmented memory mapping.                                                                                           *
    2251 *********************************************************************************************************************************/
    2252 
    2253 /**
    2254  * Used by TB code to map unsigned 8-bit data for atomic read-write w/
    2255  * segmentation.
    2256  */
    2257 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2258                                                                    RTGCPTR GCPtrMem, uint8_t iSegReg))
    2259 {
    2260 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2261     return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2262 #else
    2263     return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2264 #endif
    2265 }
    2266 
    2267 
    2268 /**
    2269  * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
    2270  */
    2271 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2272                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2273 {
    2274 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2275     return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2276 #else
    2277     return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2278 #endif
    2279 }
    2280 
    2281 
    2282 /**
    2283  * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
    2284  */
    2285 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2286                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2287 {
    2288 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2289     return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2290 #else
    2291     return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2292 #endif
    2293 }
    2294 
    2295 
    2296 /**
    2297  * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
    2298  */
    2299 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2300                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2301 {
    2302 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2303     return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2304 #else
    2305     return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2306 #endif
    2307 }
    2308 
    2309 
    2310 /**
    2311  * Used by TB code to map unsigned 16-bit data for atomic read-write w/
    2312  * segmentation.
    2313  */
    2314 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2315                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2316 {
    2317 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2318     return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2319 #else
    2320     return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2321 #endif
    2322 }
    2323 
    2324 
    2325 /**
    2326  * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
    2327  */
    2328 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2329                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2330 {
    2331 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2332     return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2333 #else
    2334     return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2335 #endif
    2336 }
    2337 
    2338 
    2339 /**
    2340  * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
    2341  */
    2342 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2343                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2344 {
    2345 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2346     return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2347 #else
    2348     return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2349 #endif
    2350 }
    2351 
    2352 
    2353 /**
    2354  * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
    2355  */
    2356 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2357                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2358 {
    2359 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2360     return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2361 #else
    2362     return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2363 #endif
    2364 }
    2365 
    2366 
    2367 /**
    2368  * Used by TB code to map unsigned 32-bit data for atomic read-write w/
    2369  * segmentation.
    2370  */
    2371 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2372                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2373 {
    2374 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2375     return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2376 #else
    2377     return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2378 #endif
    2379 }
    2380 
    2381 
    2382 /**
    2383  * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
    2384  */
    2385 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2386                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2387 {
    2388 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2389     return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2390 #else
    2391     return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2392 #endif
    2393 }
    2394 
    2395 
    2396 /**
    2397  * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
    2398  */
    2399 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2400                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2401 {
    2402 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2403     return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2404 #else
    2405     return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2406 #endif
    2407 }
    2408 
    2409 
    2410 /**
    2411  * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
    2412  */
    2413 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2414                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2415 {
    2416 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2417     return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2418 #else
    2419     return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2420 #endif
    2421 }
    2422 
    2423 
    2424 /**
    2425  * Used by TB code to map unsigned 64-bit data for atomic read-write w/
    2426  * segmentation.
    2427  */
    2428 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2429                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2430 {
    2431 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2432     return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2433 #else
    2434     return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2435 #endif
    2436 }
    2437 
    2438 
    2439 /**
    2440  * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
    2441  */
    2442 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2443                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2444 {
    2445 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2446     return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2447 #else
    2448     return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2449 #endif
    2450 }
    2451 
    2452 
    2453 /**
    2454  * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
    2455  */
    2456 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2457                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2458 {
    2459 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2460     return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2461 #else
    2462     return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2463 #endif
    2464 }
    2465 
    2466 
    2467 /**
    2468  * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
    2469  */
    2470 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2471                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2472 {
    2473 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2474     return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2475 #else
    2476     return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2477 #endif
    2478 }
    2479 
    2480 
    2481 /**
    2482  * Used by TB code to map 80-bit float data writeonly w/ segmentation.
    2483  */
    2484 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2485                                                                    RTGCPTR GCPtrMem, uint8_t iSegReg))
    2486 {
    2487 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2488     return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2489 #else
    2490     return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2491 #endif
    2492 }
    2493 
    2494 
    2495 /**
    2496  * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
    2497  */
    2498 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2499                                                                   RTGCPTR GCPtrMem, uint8_t iSegReg))
    2500 {
    2501 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2502     return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2503 #else
    2504     return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2505 #endif
    2506 }
    2507 
    2508 
    2509 /**
    2510  * Used by TB code to map unsigned 128-bit data for atomic read-write w/
    2511  * segmentation.
    2512  */
    2513 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2514                                                                         RTGCPTR GCPtrMem, uint8_t iSegReg))
    2515 {
    2516 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2517     return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2518 #else
    2519     return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2520 #endif
    2521 }
    2522 
    2523 
    2524 /**
    2525  * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
    2526  */
    2527 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2528                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2529 {
    2530 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2531     return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2532 #else
    2533     return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2534 #endif
    2535 }
    2536 
    2537 
    2538 /**
    2539  * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
    2540  */
    2541 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2542                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2543 {
    2544 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2545     return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2546 #else
    2547     return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2548 #endif
    2549 }
    2550 
    2551 
    2552 /**
    2553  * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
    2554  */
    2555 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2556                                                                           RTGCPTR GCPtrMem, uint8_t iSegReg))
    2557 {
    2558 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2559     return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2560 #else
    2561     return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2562 #endif
    2563 }
    2564 
    2565 
    2566 /*********************************************************************************************************************************
    2567 *   Helpers: Flat memory mapping.                                                                                                *
    2568 *********************************************************************************************************************************/
    2569 
    2570 /**
    2571  * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
    2572  * address.
    2573  */
    2574 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2575 {
    2576 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2577     return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2578 #else
    2579     return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2580 #endif
    2581 }
    2582 
    2583 
    2584 /**
    2585  * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
    2586  */
    2587 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2588 {
    2589 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2590     return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2591 #else
    2592     return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2593 #endif
    2594 }
    2595 
    2596 
    2597 /**
    2598  * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
    2599  */
    2600 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2601 {
    2602 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2603     return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2604 #else
    2605     return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2606 #endif
    2607 }
    2608 
    2609 
    2610 /**
    2611  * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
    2612  */
    2613 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2614 {
    2615 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2616     return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2617 #else
    2618     return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2619 #endif
    2620 }
    2621 
    2622 
    2623 /**
    2624  * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
    2625  * address.
    2626  */
    2627 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2628 {
    2629 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2630     return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2631 #else
    2632     return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2633 #endif
    2634 }
    2635 
    2636 
    2637 /**
    2638  * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
    2639  */
    2640 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2641 {
    2642 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2643     return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2644 #else
    2645     return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2646 #endif
    2647 }
    2648 
    2649 
    2650 /**
    2651  * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
    2652  */
    2653 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2654 {
    2655 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2656     return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2657 #else
    2658     return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2659 #endif
    2660 }
    2661 
    2662 
    2663 /**
    2664  * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
    2665  */
    2666 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2667 {
    2668 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2669     return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2670 #else
    2671     return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2672 #endif
    2673 }
    2674 
    2675 
    2676 /**
    2677  * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
    2678  * address.
    2679  */
    2680 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2681 {
    2682 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2683     return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2684 #else
    2685     return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2686 #endif
    2687 }
    2688 
    2689 
    2690 /**
    2691  * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
    2692  */
    2693 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2694 {
    2695 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2696     return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2697 #else
    2698     return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2699 #endif
    2700 }
    2701 
    2702 
    2703 /**
    2704  * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
    2705  */
    2706 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2707 {
    2708 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2709     return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2710 #else
    2711     return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2712 #endif
    2713 }
    2714 
    2715 
    2716 /**
    2717  * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
    2718  */
    2719 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2720 {
    2721 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2722     return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2723 #else
    2724     return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2725 #endif
    2726 }
    2727 
    2728 
    2729 /**
    2730  * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
    2731  * address.
    2732  */
    2733 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2734 {
    2735 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2736     return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2737 #else
    2738     return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2739 #endif
    2740 }
    2741 
    2742 
    2743 /**
    2744  * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
    2745  */
    2746 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2747 {
    2748 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2749     return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2750 #else
    2751     return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2752 #endif
    2753 }
    2754 
    2755 
    2756 /**
    2757  * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
    2758  */
    2759 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2760 {
    2761 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2762     return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2763 #else
    2764     return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2765 #endif
    2766 }
    2767 
    2768 
    2769 /**
    2770  * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
    2771  */
    2772 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2773 {
    2774 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2775     return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2776 #else
    2777     return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2778 #endif
    2779 }
    2780 
    2781 
    2782 /**
    2783  * Used by TB code to map 80-bit float data writeonly w/ flat address.
    2784  */
    2785 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2786 {
    2787 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2788     return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2789 #else
    2790     return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2791 #endif
    2792 }
    2793 
    2794 
    2795 /**
    2796  * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
    2797  */
    2798 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2799 {
    2800 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2801     return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2802 #else
    2803     return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2804 #endif
    2805 }
    2806 
    2807 
    2808 /**
    2809  * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
    2810  * address.
    2811  */
    2812 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2813 {
    2814 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2815     return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2816 #else
    2817     return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2818 #endif
    2819 }
    2820 
    2821 
    2822 /**
    2823  * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
    2824  */
    2825 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2826 {
    2827 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2828     return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2829 #else
    2830     return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2831 #endif
    2832 }
    2833 
    2834 
    2835 /**
    2836  * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
    2837  */
    2838 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2839 {
    2840 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2841     return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2842 #else
    2843     return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2844 #endif
    2845 }
    2846 
    2847 
    2848 /**
    2849  * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
    2850  */
    2851 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2852 {
    2853 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2854     return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2855 #else
    2856     return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2857 #endif
    2858 }
    2859 
    2860 
    2861 /*********************************************************************************************************************************
    2862 *   Helpers: Commit, rollback & unmap                                                                                            *
    2863 *********************************************************************************************************************************/
    2864 
    2865 /**
    2866  * Used by TB code to commit and unmap a read-write memory mapping.
    2867  */
    2868 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2869 {
    2870     return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
    2871 }
    2872 
    2873 
    2874 /**
    2875  * Used by TB code to commit and unmap a read-write memory mapping.
    2876  */
    2877 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2878 {
    2879     return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
    2880 }
    2881 
    2882 
    2883 /**
    2884  * Used by TB code to commit and unmap a write-only memory mapping.
    2885  */
    2886 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2887 {
    2888     return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
    2889 }
    2890 
    2891 
    2892 /**
    2893  * Used by TB code to commit and unmap a read-only memory mapping.
    2894  */
    2895 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2896 {
    2897     return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
    2898 }
    2899 
    2900 
    2901 /**
    2902  * Reinitializes the native recompiler state.
    2903  *
    2904  * Called before starting a new recompile job.
    2905  */
    2906 static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
    2907 {
    2908     pReNative->cLabels                     = 0;
    2909     pReNative->bmLabelTypes                = 0;
    2910     pReNative->cFixups                     = 0;
    2911 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2912     pReNative->pDbgInfo->cEntries          = 0;
    2913 #endif
    2914     pReNative->pTbOrg                      = pTb;
    2915     pReNative->cCondDepth                  = 0;
    2916     pReNative->uCondSeqNo                  = 0;
    2917     pReNative->uCheckIrqSeqNo              = 0;
    2918     pReNative->uTlbSeqNo                   = 0;
    2919 
    2920     pReNative->Core.bmHstRegs              = IEMNATIVE_REG_FIXED_MASK
    2921 #if IEMNATIVE_HST_GREG_COUNT < 32
    2922                                            | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
    2923 #endif
    2924                                            ;
    2925     pReNative->Core.bmHstRegsWithGstShadow = 0;
    2926     pReNative->Core.bmGstRegShadows        = 0;
    2927     pReNative->Core.bmVars                 = 0;
    2928     pReNative->Core.bmStack                = 0;
    2929     AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
    2930     pReNative->Core.u64ArgVars             = UINT64_MAX;
    2931 
    2932     AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 11);
    2933     pReNative->aidxUniqueLabels[0]         = UINT32_MAX;
    2934     pReNative->aidxUniqueLabels[1]         = UINT32_MAX;
    2935     pReNative->aidxUniqueLabels[2]         = UINT32_MAX;
    2936     pReNative->aidxUniqueLabels[3]         = UINT32_MAX;
    2937     pReNative->aidxUniqueLabels[4]         = UINT32_MAX;
    2938     pReNative->aidxUniqueLabels[5]         = UINT32_MAX;
    2939     pReNative->aidxUniqueLabels[6]         = UINT32_MAX;
    2940     pReNative->aidxUniqueLabels[7]         = UINT32_MAX;
    2941     pReNative->aidxUniqueLabels[8]         = UINT32_MAX;
    2942     pReNative->aidxUniqueLabels[9]         = UINT32_MAX;
    2943     pReNative->aidxUniqueLabels[10]        = UINT32_MAX;
    2944 
    2945     /* Full host register reinit: */
    2946     for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
    2947     {
    2948         pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
    2949         pReNative->Core.aHstRegs[i].enmWhat        = kIemNativeWhat_Invalid;
    2950         pReNative->Core.aHstRegs[i].idxVar         = UINT8_MAX;
    2951     }
    2952 
    2953     uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
    2954                    & ~(  RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
    2955 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2956                        | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
    2957 #endif
    2958 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2959                        | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    2960 #endif
    2961                       );
    2962     for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
    2963     {
    2964         fRegs &= ~RT_BIT_32(idxReg);
    2965         pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
    2966     }
    2967 
    2968     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat     = kIemNativeWhat_pVCpuFixed;
    2969 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2970     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat   = kIemNativeWhat_pCtxFixed;
    2971 #endif
    2972 #ifdef IEMNATIVE_REG_FIXED_TMP0
    2973     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat       = kIemNativeWhat_FixedTmp;
    2974 #endif
    2975     return pReNative;
    2976 }
    2977 
    2978 
    2979 /**
    2980  * Allocates and initializes the native recompiler state.
    2981  *
    2982  * This is called the first time an EMT wants to recompile something.
    2983  *
    2984  * @returns Pointer to the new recompiler state.
    2985  * @param   pVCpu   The cross context virtual CPU structure of the calling
    2986  *                  thread.
    2987  * @param   pTb     The TB that's about to be recompiled.
    2988  * @thread  EMT(pVCpu)
    2989  */
    2990 static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
    2991 {
    2992     VMCPU_ASSERT_EMT(pVCpu);
    2993 
    2994     PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
    2995     AssertReturn(pReNative, NULL);
    2996 
    2997     /*
    2998      * Try allocate all the buffers and stuff we need.
    2999      */
    3000     pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
    3001     pReNative->paLabels  = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
    3002     pReNative->paFixups  = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
    3003 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3004     pReNative->pDbgInfo  = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
    3005 #endif
    3006     if (RT_LIKELY(   pReNative->pInstrBuf
    3007                   && pReNative->paLabels
    3008                   && pReNative->paFixups)
    3009 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3010         && pReNative->pDbgInfo
    3011 #endif
    3012        )
    3013     {
    3014         /*
    3015          * Set the buffer & array sizes on success.
    3016          */
    3017         pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
    3018         pReNative->cLabelsAlloc   = _8K;
    3019         pReNative->cFixupsAlloc   = _16K;
    3020 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3021         pReNative->cDbgInfoAlloc  = _16K;
    3022 #endif
    3023 
    3024         /* Other constant stuff: */
    3025         pReNative->pVCpu          = pVCpu;
    3026 
    3027         /*
    3028          * Done, just need to save it and reinit it.
    3029          */
    3030         pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
    3031         return iemNativeReInit(pReNative, pTb);
    3032     }
    3033 
    3034     /*
    3035      * Failed. Cleanup and return.
    3036      */
    3037     AssertFailed();
    3038     RTMemFree(pReNative->pInstrBuf);
    3039     RTMemFree(pReNative->paLabels);
    3040     RTMemFree(pReNative->paFixups);
    3041 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3042     RTMemFree(pReNative->pDbgInfo);
    3043 #endif
    3044     RTMemFree(pReNative);
    3045     return NULL;
    3046 }
    3047 
    3048 
    3049 /**
    3050  * Creates a label
    3051  *
    3052  * If the label does not yet have a defined position,
    3053  * call iemNativeLabelDefine() later to set it.
    3054  *
    3055  * @returns Label ID. Throws VBox status code on failure, so no need to check
    3056  *          the return value.
    3057  * @param   pReNative   The native recompile state.
    3058  * @param   enmType     The label type.
    3059  * @param   offWhere    The instruction offset of the label.  UINT32_MAX if the
    3060  *                      label is not yet defined (default).
    3061  * @param   uData       Data associated with the lable. Only applicable to
    3062  *                      certain type of labels. Default is zero.
    3063  */
    3064 DECL_HIDDEN_THROW(uint32_t)
    3065 iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    3066                      uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
    3067 {
    3068     Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
    3069 
    3070     /*
    3071      * Locate existing label definition.
    3072      *
    3073      * This is only allowed for forward declarations where offWhere=UINT32_MAX
    3074      * and uData is zero.
    3075      */
    3076     PIEMNATIVELABEL paLabels = pReNative->paLabels;
    3077     uint32_t const  cLabels  = pReNative->cLabels;
    3078     if (   pReNative->bmLabelTypes & RT_BIT_64(enmType)
    3079 #ifndef VBOX_STRICT
    3080         && enmType  <  kIemNativeLabelType_FirstWithMultipleInstances
    3081         && offWhere == UINT32_MAX
    3082         && uData    == 0
    3083 #endif
    3084         )
    3085     {
    3086 #ifndef VBOX_STRICT
    3087         AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
    3088                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3089         uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
    3090         if (idxLabel < pReNative->cLabels)
    3091             return idxLabel;
    3092 #else
    3093         for (uint32_t i = 0; i < cLabels; i++)
    3094             if (   paLabels[i].enmType == enmType
    3095                 && paLabels[i].uData   == uData)
    3096             {
    3097                 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3098                 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3099                 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
    3100                 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
    3101                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3102                 return i;
    3103             }
    3104         AssertStmt(   enmType >= kIemNativeLabelType_FirstWithMultipleInstances
    3105                    || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3106 #endif
    3107     }
    3108 
    3109     /*
    3110      * Make sure we've got room for another label.
    3111      */
    3112     if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
    3113     { /* likely */ }
    3114     else
    3115     {
    3116         uint32_t cNew = pReNative->cLabelsAlloc;
    3117         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    3118         AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    3119         cNew *= 2;
    3120         AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
    3121         paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
    3122         AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
    3123         pReNative->paLabels     = paLabels;
    3124         pReNative->cLabelsAlloc = cNew;
    3125     }
    3126 
    3127     /*
    3128      * Define a new label.
    3129      */
    3130     paLabels[cLabels].off     = offWhere;
    3131     paLabels[cLabels].enmType = enmType;
    3132     paLabels[cLabels].uData   = uData;
    3133     pReNative->cLabels = cLabels + 1;
    3134 
    3135     Assert((unsigned)enmType < 64);
    3136     pReNative->bmLabelTypes |= RT_BIT_64(enmType);
    3137 
    3138     if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    3139     {
    3140         Assert(uData == 0);
    3141         pReNative->aidxUniqueLabels[enmType] = cLabels;
    3142     }
    3143 
    3144     if (offWhere != UINT32_MAX)
    3145     {
    3146 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3147         iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    3148         iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
    3149 #endif
    3150     }
    3151     return cLabels;
    3152 }
    3153 
    3154 
    3155 /**
    3156  * Defines the location of an existing label.
    3157  *
    3158  * @param   pReNative   The native recompile state.
    3159  * @param   idxLabel    The label to define.
    3160  * @param   offWhere    The position.
    3161  */
    3162 DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
    3163 {
    3164     AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
    3165     PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
    3166     AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
    3167     pLabel->off = offWhere;
    3168 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3169     iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    3170     iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
    3171 #endif
    3172 }
    3173 
    3174 
    3175 /**
    3176  * Looks up a lable.
    3177  *
    3178  * @returns Label ID if found, UINT32_MAX if not.
    3179  */
    3180 static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    3181                                    uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
    3182 {
    3183     Assert((unsigned)enmType < 64);
    3184     if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
    3185     {
    3186         if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    3187             return pReNative->aidxUniqueLabels[enmType];
    3188 
    3189         PIEMNATIVELABEL paLabels = pReNative->paLabels;
    3190         uint32_t const  cLabels  = pReNative->cLabels;
    3191         for (uint32_t i = 0; i < cLabels; i++)
    3192             if (   paLabels[i].enmType == enmType
    3193                 && paLabels[i].uData   == uData
    3194                 && (   paLabels[i].off == offWhere
    3195                     || offWhere        == UINT32_MAX
    3196                     || paLabels[i].off == UINT32_MAX))
    3197                 return i;
    3198     }
    3199     return UINT32_MAX;
    3200 }
    3201 
    3202 
    3203 /**
    3204  * Adds a fixup.
    3205  *
    3206  * @throws  VBox status code (int) on failure.
    3207  * @param   pReNative   The native recompile state.
    3208  * @param   offWhere    The instruction offset of the fixup location.
    3209  * @param   idxLabel    The target label ID for the fixup.
    3210  * @param   enmType     The fixup type.
    3211  * @param   offAddend   Fixup addend if applicable to the type. Default is 0.
    3212  */
    3213 DECL_HIDDEN_THROW(void)
    3214 iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
    3215                   IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
    3216 {
    3217     Assert(idxLabel <= UINT16_MAX);
    3218     Assert((unsigned)enmType <= UINT8_MAX);
    3219 
    3220     /*
    3221      * Make sure we've room.
    3222      */
    3223     PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
    3224     uint32_t const  cFixups  = pReNative->cFixups;
    3225     if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
    3226     { /* likely */ }
    3227     else
    3228     {
    3229         uint32_t cNew = pReNative->cFixupsAlloc;
    3230         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    3231         AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    3232         cNew *= 2;
    3233         AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
    3234         paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
    3235         AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
    3236         pReNative->paFixups     = paFixups;
    3237         pReNative->cFixupsAlloc = cNew;
    3238     }
    3239 
    3240     /*
    3241      * Add the fixup.
    3242      */
    3243     paFixups[cFixups].off       = offWhere;
    3244     paFixups[cFixups].idxLabel  = (uint16_t)idxLabel;
    3245     paFixups[cFixups].enmType   = enmType;
    3246     paFixups[cFixups].offAddend = offAddend;
    3247     pReNative->cFixups = cFixups + 1;
    3248 }
    3249 
    3250 
    3251 /**
    3252  * Slow code path for iemNativeInstrBufEnsure.
    3253  */
    3254 DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
    3255 {
    3256     /* Double the buffer size till we meet the request. */
    3257     uint32_t cNew = pReNative->cInstrBufAlloc;
    3258     AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
    3259     do
    3260         cNew *= 2;
    3261     while (cNew < off + cInstrReq);
    3262 
    3263     uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
    3264 #ifdef RT_ARCH_ARM64
    3265     uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
    3266 #else
    3267     uint32_t const cbMaxInstrBuf = _2M;
    3268 #endif
    3269     AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
    3270 
    3271     void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
    3272     AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
    3273 
    3274 #ifdef VBOX_STRICT
    3275     pReNative->offInstrBufChecked = off + cInstrReq;
    3276 #endif
    3277     pReNative->cInstrBufAlloc     = cNew;
    3278     return pReNative->pInstrBuf   = (PIEMNATIVEINSTR)pvNew;
    3279 }
    3280 
    3281 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3282 
    3283 /**
    3284  * Grows the static debug info array used during recompilation.
    3285  *
    3286  * @returns Pointer to the new debug info block; throws VBox status code on
    3287  *          failure, so no need to check the return value.
    3288  */
    3289 DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    3290 {
    3291     uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
    3292     AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
    3293     pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
    3294     AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
    3295     pReNative->pDbgInfo      = pDbgInfo;
    3296     pReNative->cDbgInfoAlloc = cNew;
    3297     return pDbgInfo;
    3298 }
    3299 
    3300 
    3301 /**
    3302  * Adds a new debug info uninitialized entry, returning the pointer to it.
    3303  */
    3304 DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    3305 {
    3306     if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
    3307     { /* likely */ }
    3308     else
    3309         pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
    3310     return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
    3311 }
    3312 
    3313 
    3314 /**
    3315  * Debug Info: Adds a native offset record, if necessary.
    3316  */
    3317 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    3318 {
    3319     PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
    3320 
    3321     /*
    3322      * Search backwards to see if we've got a similar record already.
    3323      */
    3324     uint32_t idx     = pDbgInfo->cEntries;
    3325     uint32_t idxStop = idx > 8 ? idx - 8 : 0;
    3326     while (idx-- > idxStop)
    3327         if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
    3328         {
    3329             if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
    3330                 return;
    3331             AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
    3332                        IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
    3333             break;
    3334         }
    3335 
    3336     /*
    3337      * Add it.
    3338      */
    3339     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
    3340     pEntry->NativeOffset.uType     = kIemTbDbgEntryType_NativeOffset;
    3341     pEntry->NativeOffset.offNative = off;
    3342 }
    3343 
    3344 
    3345 /**
    3346  * Debug Info: Record info about a label.
    3347  */
    3348 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
    3349 {
    3350     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3351     pEntry->Label.uType    = kIemTbDbgEntryType_Label;
    3352     pEntry->Label.uUnused  = 0;
    3353     pEntry->Label.enmLabel = (uint8_t)enmType;
    3354     pEntry->Label.uData    = uData;
    3355 }
    3356 
    3357 
    3358 /**
    3359  * Debug Info: Record info about a threaded call.
    3360  */
    3361 static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
    3362 {
    3363     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3364     pEntry->ThreadedCall.uType       = kIemTbDbgEntryType_ThreadedCall;
    3365     pEntry->ThreadedCall.fRecompiled = fRecompiled;
    3366     pEntry->ThreadedCall.uUnused     = 0;
    3367     pEntry->ThreadedCall.enmCall     = (uint16_t)enmCall;
    3368 }
    3369 
    3370 
    3371 /**
    3372  * Debug Info: Record info about a new guest instruction.
    3373  */
    3374 static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
    3375 {
    3376     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3377     pEntry->GuestInstruction.uType   = kIemTbDbgEntryType_GuestInstruction;
    3378     pEntry->GuestInstruction.uUnused = 0;
    3379     pEntry->GuestInstruction.fExec   = fExec;
    3380 }
    3381 
    3382 
    3383 /**
    3384  * Debug Info: Record info about guest register shadowing.
    3385  */
    3386 static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
    3387                                                  uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
    3388 {
    3389     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3390     pEntry->GuestRegShadowing.uType         = kIemTbDbgEntryType_GuestRegShadowing;
    3391     pEntry->GuestRegShadowing.uUnused       = 0;
    3392     pEntry->GuestRegShadowing.idxGstReg     = enmGstReg;
    3393     pEntry->GuestRegShadowing.idxHstReg     = idxHstReg;
    3394     pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
    3395 }
    3396 
    3397 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    3398 
    3399 
    3400 /*********************************************************************************************************************************
    3401 *   Register Allocator                                                                                                           *
    3402 *********************************************************************************************************************************/
    3403 
    3404 /**
    3405  * Register parameter indexes (indexed by argument number).
    3406  */
    3407 DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
    3408 {
    3409     IEMNATIVE_CALL_ARG0_GREG,
    3410     IEMNATIVE_CALL_ARG1_GREG,
    3411     IEMNATIVE_CALL_ARG2_GREG,
    3412     IEMNATIVE_CALL_ARG3_GREG,
    3413 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    3414     IEMNATIVE_CALL_ARG4_GREG,
    3415 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    3416     IEMNATIVE_CALL_ARG5_GREG,
    3417 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    3418     IEMNATIVE_CALL_ARG6_GREG,
    3419 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    3420     IEMNATIVE_CALL_ARG7_GREG,
    3421 #   endif
    3422 #  endif
    3423 # endif
    3424 #endif
    3425 };
    3426 
    3427 /**
    3428  * Call register masks indexed by argument count.
    3429  */
    3430 DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
    3431 {
    3432     0,
    3433     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
    3434     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
    3435     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
    3436       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3437     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
    3438 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    3439       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3440     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
    3441 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    3442       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3443     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
    3444 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    3445       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3446     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    3447     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
    3448 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    3449       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3450     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    3451     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
    3452 #   endif
    3453 #  endif
    3454 # endif
    3455 #endif
    3456 };
    3457 
    3458 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    3459 /**
    3460  * BP offset of the stack argument slots.
    3461  *
    3462  * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
    3463  * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
    3464  */
    3465 DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
    3466 {
    3467     IEMNATIVE_FP_OFF_STACK_ARG0,
    3468 # ifdef IEMNATIVE_FP_OFF_STACK_ARG1
    3469     IEMNATIVE_FP_OFF_STACK_ARG1,
    3470 # endif
    3471 # ifdef IEMNATIVE_FP_OFF_STACK_ARG2
    3472     IEMNATIVE_FP_OFF_STACK_ARG2,
    3473 # endif
    3474 # ifdef IEMNATIVE_FP_OFF_STACK_ARG3
    3475     IEMNATIVE_FP_OFF_STACK_ARG3,
    3476 # endif
    3477 };
    3478 AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
    3479 #endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
    3480 
    3481 /**
    3482  * Info about shadowed guest register values.
    3483  * @see IEMNATIVEGSTREG
    3484  */
    3485 static struct
    3486 {
    3487     /** Offset in VMCPU. */
    3488     uint32_t    off;
    3489     /** The field size. */
    3490     uint8_t     cb;
    3491     /** Name (for logging). */
    3492     const char *pszName;
    3493 } const g_aGstShadowInfo[] =
    3494 {
    3495 #define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
    3496     /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */  { CPUMCTX_OFF_AND_SIZE(rax),                "rax", },
    3497     /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */  { CPUMCTX_OFF_AND_SIZE(rcx),                "rcx", },
    3498     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */  { CPUMCTX_OFF_AND_SIZE(rdx),                "rdx", },
    3499     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */  { CPUMCTX_OFF_AND_SIZE(rbx),                "rbx", },
    3500     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */  { CPUMCTX_OFF_AND_SIZE(rsp),                "rsp", },
    3501     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */  { CPUMCTX_OFF_AND_SIZE(rbp),                "rbp", },
    3502     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */  { CPUMCTX_OFF_AND_SIZE(rsi),                "rsi", },
    3503     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */  { CPUMCTX_OFF_AND_SIZE(rdi),                "rdi", },
    3504     /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */  { CPUMCTX_OFF_AND_SIZE(r8),                 "r8", },
    3505     /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */  { CPUMCTX_OFF_AND_SIZE(r9),                 "r9", },
    3506     /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */  { CPUMCTX_OFF_AND_SIZE(r10),                "r10", },
    3507     /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */  { CPUMCTX_OFF_AND_SIZE(r11),                "r11", },
    3508     /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */  { CPUMCTX_OFF_AND_SIZE(r12),                "r12", },
    3509     /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */  { CPUMCTX_OFF_AND_SIZE(r13),                "r13", },
    3510     /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */  { CPUMCTX_OFF_AND_SIZE(r14),                "r14", },
    3511     /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */  { CPUMCTX_OFF_AND_SIZE(r15),                "r15", },
    3512     /* [kIemNativeGstReg_Pc] = */                       { CPUMCTX_OFF_AND_SIZE(rip),                "rip", },
    3513     /* [kIemNativeGstReg_Cr0] = */                      { CPUMCTX_OFF_AND_SIZE(cr0),                "cr0", },
    3514     /* [kIemNativeGstReg_FpuFcw] = */                   { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW),     "fcw", },
    3515     /* [kIemNativeGstReg_FpuFsw] = */                   { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW),     "fsw", },
    3516     /* [kIemNativeGstReg_SegBaseFirst + 0] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base),  "es_base", },
    3517     /* [kIemNativeGstReg_SegBaseFirst + 1] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base),  "cs_base", },
    3518     /* [kIemNativeGstReg_SegBaseFirst + 2] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base),  "ss_base", },
    3519     /* [kIemNativeGstReg_SegBaseFirst + 3] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base),  "ds_base", },
    3520     /* [kIemNativeGstReg_SegBaseFirst + 4] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base),  "fs_base", },
    3521     /* [kIemNativeGstReg_SegBaseFirst + 5] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base),  "gs_base", },
    3522     /* [kIemNativeGstReg_SegAttribFirst + 0] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u),   "es_attrib", },
    3523     /* [kIemNativeGstReg_SegAttribFirst + 1] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u),   "cs_attrib", },
    3524     /* [kIemNativeGstReg_SegAttribFirst + 2] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u),   "ss_attrib", },
    3525     /* [kIemNativeGstReg_SegAttribFirst + 3] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u),   "ds_attrib", },
    3526     /* [kIemNativeGstReg_SegAttribFirst + 4] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u),   "fs_attrib", },
    3527     /* [kIemNativeGstReg_SegAttribFirst + 5] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u),   "gs_attrib", },
    3528     /* [kIemNativeGstReg_SegLimitFirst + 0] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
    3529     /* [kIemNativeGstReg_SegLimitFirst + 1] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
    3530     /* [kIemNativeGstReg_SegLimitFirst + 2] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
    3531     /* [kIemNativeGstReg_SegLimitFirst + 3] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
    3532     /* [kIemNativeGstReg_SegLimitFirst + 4] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
    3533     /* [kIemNativeGstReg_SegLimitFirst + 5] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
    3534     /* [kIemNativeGstReg_SegSelFirst + 0] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel),      "es", },
    3535     /* [kIemNativeGstReg_SegSelFirst + 1] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel),      "cs", },
    3536     /* [kIemNativeGstReg_SegSelFirst + 2] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel),      "ss", },
    3537     /* [kIemNativeGstReg_SegSelFirst + 3] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel),      "ds", },
    3538     /* [kIemNativeGstReg_SegSelFirst + 4] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel),      "fs", },
    3539     /* [kIemNativeGstReg_SegSelFirst + 5] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel),      "gs", },
    3540     /* [kIemNativeGstReg_Cr4] = */                      { CPUMCTX_OFF_AND_SIZE(cr4),                "cr4", },
    3541     /* [kIemNativeGstReg_EFlags] = */                   { CPUMCTX_OFF_AND_SIZE(eflags),             "eflags", },
    3542 #undef CPUMCTX_OFF_AND_SIZE
    3543 };
    3544 AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
    3545 
    3546 
    3547 /** Host CPU general purpose register names. */
    3548 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
    3549 {
    3550 #ifdef RT_ARCH_AMD64
    3551     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
    3552 #elif RT_ARCH_ARM64
    3553     "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",  "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    3554     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp",  "lr",  "sp/xzr",
    3555 #else
    3556 # error "port me"
    3557 #endif
    3558 };
    3559 
    3560 
    3561 DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
    3562                                                      IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
    3563 {
    3564     pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    3565 
    3566     pReNative->Core.aHstRegs[idxReg].enmWhat        = enmWhat;
    3567     pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3568     pReNative->Core.aHstRegs[idxReg].idxVar         = idxVar;
    3569     return (uint8_t)idxReg;
    3570 }
    3571 
    3572 
    3573 #if 0 /* unused */
    3574 /**
    3575  * Tries to locate a suitable register in the given register mask.
    3576  *
    3577  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    3578  * failed.
    3579  *
    3580  * @returns Host register number on success, returns UINT8_MAX on failure.
    3581  */
    3582 static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
    3583 {
    3584     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3585     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    3586     if (fRegs)
    3587     {
    3588         /** @todo pick better here:    */
    3589         unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
    3590 
    3591         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3592         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3593                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3594         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3595 
    3596         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3597         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3598         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3599         return idxReg;
    3600     }
    3601     return UINT8_MAX;
    3602 }
    3603 #endif /* unused */
    3604 
    3605 
    3606 /**
    3607  * Locate a register, possibly freeing one up.
    3608  *
    3609  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    3610  * failed.
    3611  *
    3612  * @returns Host register number on success. Returns UINT8_MAX if no registers
    3613  *          found, the caller is supposed to deal with this and raise a
    3614  *          allocation type specific status code (if desired).
    3615  *
    3616  * @throws  VBox status code if we're run into trouble spilling a variable of
    3617  *          recording debug info.  Does NOT throw anything if we're out of
    3618  *          registers, though.
    3619  */
    3620 static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
    3621                                          uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
    3622 {
    3623     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
    3624     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3625     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    3626 
    3627     /*
    3628      * Try a freed register that's shadowing a guest register.
    3629      */
    3630     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    3631     if (fRegs)
    3632     {
    3633         STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
    3634 
    3635 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    3636         /*
    3637          * When we have livness information, we use it to kick out all shadowed
    3638          * guest register that will not be needed any more in this TB.  If we're
    3639          * lucky, this may prevent us from ending up here again.
    3640          *
    3641          * Note! We must consider the previous entry here so we don't free
    3642          *       anything that the current threaded function requires (current
    3643          *       entry is produced by the next threaded function).
    3644          */
    3645         uint32_t const idxCurCall = pReNative->idxCurCall;
    3646         if (idxCurCall > 0)
    3647         {
    3648             PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
    3649 
    3650 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    3651             /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
    3652             AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
    3653             uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
    3654 #else
    3655             /* Construct a mask of the registers not in the read or write state.
    3656                Note! We could skips writes, if they aren't from us, as this is just
    3657                      a hack to prevent trashing registers that have just been written
    3658                      or will be written when we retire the current instruction. */
    3659             uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    3660                                  & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    3661                                  & IEMLIVENESSBIT_MASK;
    3662 #endif
    3663             /* Merge EFLAGS. */
    3664             uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3);   /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
    3665             fTmp &= fTmp >> 2;                                  /*         CF3,Other3 = AF2,PF2 & CF2,Other2  */
    3666             fTmp &= fTmp >> 1;                                  /*             Other4 = CF3 & Other3 */
    3667             fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
    3668             fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
    3669 
    3670             /* If it matches any shadowed registers. */
    3671             if (pReNative->Core.bmGstRegShadows & fToFreeMask)
    3672             {
    3673                 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
    3674                 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
    3675                 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
    3676 
    3677                 /* See if we've got any unshadowed registers we can return now. */
    3678                 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
    3679                 if (fUnshadowedRegs)
    3680                 {
    3681                     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
    3682                     return (fPreferVolatile
    3683                             ? ASMBitFirstSetU32(fUnshadowedRegs)
    3684                             : ASMBitLastSetU32(  fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3685                                                ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
    3686                          - 1;
    3687                 }
    3688             }
    3689         }
    3690 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    3691 
    3692         unsigned const idxReg = (fPreferVolatile
    3693                                  ? ASMBitFirstSetU32(fRegs)
    3694                                  : ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3695                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
    3696                               - 1;
    3697 
    3698         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3699         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3700                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3701         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3702 
    3703         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3704         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3705         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3706         return idxReg;
    3707     }
    3708 
    3709     /*
    3710      * Try free up a variable that's in a register.
    3711      *
    3712      * We do two rounds here, first evacuating variables we don't need to be
    3713      * saved on the stack, then in the second round move things to the stack.
    3714      */
    3715     STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
    3716     for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
    3717     {
    3718         uint32_t fVars = pReNative->Core.bmVars;
    3719         while (fVars)
    3720         {
    3721             uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
    3722             uint8_t const  idxReg = pReNative->Core.aVars[idxVar].idxReg;
    3723             if (   idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
    3724                 && (RT_BIT_32(idxReg) & fRegMask)
    3725                 && (  iLoop == 0
    3726                     ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
    3727                     : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    3728                 && !pReNative->Core.aVars[idxVar].fRegAcquired)
    3729             {
    3730                 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
    3731                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
    3732                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3733                 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3734                 Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    3735                        == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
    3736 
    3737                 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    3738                 {
    3739                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    3740                     *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
    3741                 }
    3742 
    3743                 pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    3744                 pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxReg);
    3745 
    3746                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3747                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3748                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3749                 return idxReg;
    3750             }
    3751             fVars &= ~RT_BIT_32(idxVar);
    3752         }
    3753     }
    3754 
    3755     return UINT8_MAX;
    3756 }
    3757 
    3758 
    3759 /**
    3760  * Reassigns a variable to a different register specified by the caller.
    3761  *
    3762  * @returns The new code buffer position.
    3763  * @param   pReNative       The native recompile state.
    3764  * @param   off             The current code buffer position.
    3765  * @param   idxVar          The variable index.
    3766  * @param   idxRegOld       The old host register number.
    3767  * @param   idxRegNew       The new host register number.
    3768  * @param   pszCaller       The caller for logging.
    3769  */
    3770 static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3771                                     uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
    3772 {
    3773     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    3774     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
    3775     RT_NOREF(pszCaller);
    3776 
    3777     iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
    3778 
    3779     uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    3780     Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
    3781            pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
    3782     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
    3783 
    3784     pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
    3785     pReNative->Core.aHstRegs[idxRegNew].enmWhat        = kIemNativeWhat_Var;
    3786     pReNative->Core.aHstRegs[idxRegNew].idxVar         = idxVar;
    3787     if (fGstRegShadows)
    3788     {
    3789         pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
    3790                                                | RT_BIT_32(idxRegNew);
    3791         while (fGstRegShadows)
    3792         {
    3793             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    3794             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    3795 
    3796             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
    3797             pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
    3798         }
    3799     }
    3800 
    3801     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
    3802     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    3803     pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
    3804     return off;
    3805 }
    3806 
    3807 
    3808 /**
    3809  * Moves a variable to a different register or spills it onto the stack.
    3810  *
    3811  * This must be a stack variable (kIemNativeVarKind_Stack) because the other
    3812  * kinds can easily be recreated if needed later.
    3813  *
    3814  * @returns The new code buffer position.
    3815  * @param   pReNative       The native recompile state.
    3816  * @param   off             The current code buffer position.
    3817  * @param   idxVar          The variable index.
    3818  * @param   fForbiddenRegs  Mask of the forbidden registers.  Defaults to
    3819  *                          call-volatile registers.
    3820  */
    3821 static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3822                                                 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
    3823 {
    3824     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    3825     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    3826     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    3827     Assert(!pVar->fRegAcquired);
    3828 
    3829     uint8_t const idxRegOld = pVar->idxReg;
    3830     Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3831     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
    3832     Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
    3833     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
    3834            == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
    3835     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3836     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
    3837            == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
    3838 
    3839 
    3840     /** @todo Add statistics on this.*/
    3841     /** @todo Implement basic variable liveness analysis (python) so variables
    3842      * can be freed immediately once no longer used.  This has the potential to
    3843      * be trashing registers and stack for dead variables.
    3844      * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
    3845 
    3846     /*
    3847      * First try move it to a different register, as that's cheaper.
    3848      */
    3849     fForbiddenRegs |= RT_BIT_32(idxRegOld);
    3850     fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
    3851     uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
    3852     if (fRegs)
    3853     {
    3854         /* Avoid using shadow registers, if possible. */
    3855         if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
    3856             fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
    3857         unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
    3858         return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
    3859     }
    3860 
    3861     /*
    3862      * Otherwise we must spill the register onto the stack.
    3863      */
    3864     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    3865     Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    3866            idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    3867     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    3868 
    3869     pVar->idxReg                            = UINT8_MAX;
    3870     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    3871     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    3872     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    3873     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    3874     return off;
    3875 }
    3876 
    3877 
    3878 /**
    3879  * Allocates a temporary host general purpose register.
    3880  *
    3881  * This may emit code to save register content onto the stack in order to free
    3882  * up a register.
    3883  *
    3884  * @returns The host register number; throws VBox status code on failure,
    3885  *          so no need to check the return value.
    3886  * @param   pReNative       The native recompile state.
    3887  * @param   poff            Pointer to the variable with the code buffer position.
    3888  *                          This will be update if we need to move a variable from
    3889  *                          register to stack in order to satisfy the request.
    3890  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3891  *                          registers (@c true, default) or the other way around
    3892  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    3893  */
    3894 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
    3895 {
    3896     /*
    3897      * Try find a completely unused register, preferably a call-volatile one.
    3898      */
    3899     uint8_t  idxReg;
    3900     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    3901                    & ~pReNative->Core.bmHstRegsWithGstShadow
    3902                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
    3903     if (fRegs)
    3904     {
    3905         if (fPreferVolatile)
    3906             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3907                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3908         else
    3909             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3910                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3911         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    3912         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    3913         Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
    3914     }
    3915     else
    3916     {
    3917         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
    3918         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    3919         Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
    3920     }
    3921     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    3922 }
    3923 
    3924 
    3925 /**
    3926  * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
    3927  * registers.
    3928  *
    3929  * @returns The host register number; throws VBox status code on failure,
    3930  *          so no need to check the return value.
    3931  * @param   pReNative       The native recompile state.
    3932  * @param   poff            Pointer to the variable with the code buffer position.
    3933  *                          This will be update if we need to move a variable from
    3934  *                          register to stack in order to satisfy the request.
    3935  * @param   fRegMask        Mask of acceptable registers.
    3936  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3937  *                          registers (@c true, default) or the other way around
    3938  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    3939  */
    3940 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
    3941                                                   bool fPreferVolatile /*= true*/)
    3942 {
    3943     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3944     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    3945 
    3946     /*
    3947      * Try find a completely unused register, preferably a call-volatile one.
    3948      */
    3949     uint8_t  idxReg;
    3950     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    3951                    & ~pReNative->Core.bmHstRegsWithGstShadow
    3952                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    3953                    & fRegMask;
    3954     if (fRegs)
    3955     {
    3956         if (fPreferVolatile)
    3957             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3958                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3959         else
    3960             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3961                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3962         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    3963         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    3964         Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
    3965     }
    3966     else
    3967     {
    3968         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
    3969         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    3970         Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
    3971     }
    3972     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    3973 }
    3974 
    3975 
    3976 /**
    3977  * Allocates a temporary register for loading an immediate value into.
    3978  *
    3979  * This will emit code to load the immediate, unless there happens to be an
    3980  * unused register with the value already loaded.
    3981  *
    3982  * The caller will not modify the returned register, it must be considered
    3983  * read-only.  Free using iemNativeRegFreeTmpImm.
    3984  *
    3985  * @returns The host register number; throws VBox status code on failure, so no
    3986  *          need to check the return value.
    3987  * @param   pReNative       The native recompile state.
    3988  * @param   poff            Pointer to the variable with the code buffer position.
    3989  * @param   uImm            The immediate value that the register must hold upon
    3990  *                          return.
    3991  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3992  *                          registers (@c true, default) or the other way around
    3993  *                          (@c false).
    3994  *
    3995  * @note    Reusing immediate values has not been implemented yet.
    3996  */
    3997 DECL_HIDDEN_THROW(uint8_t)
    3998 iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
    3999 {
    4000     uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
    4001     *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
    4002     return idxReg;
    4003 }
    4004 
    4005 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4006 
    4007 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4008 /**
    4009  * Helper for iemNativeLivenessGetStateByGstReg.
    4010  *
    4011  * @returns IEMLIVENESS_STATE_XXX
    4012  * @param   fMergedStateExp2    This is the RT_BIT_32() of each sub-state
    4013  *                              ORed together.
    4014  */
    4015 DECL_FORCE_INLINE(uint32_t)
    4016 iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
    4017 {
    4018     /* INPUT trumps anything else. */
    4019     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
    4020         return IEMLIVENESS_STATE_INPUT;
    4021 
    4022     /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
    4023     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
    4024     {
    4025         /* If not all sub-fields are clobbered they must be considered INPUT. */
    4026         if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
    4027             return IEMLIVENESS_STATE_INPUT;
    4028         return IEMLIVENESS_STATE_CLOBBERED;
    4029     }
    4030 
    4031     /* XCPT_OR_CALL trumps UNUSED. */
    4032     if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
    4033         return IEMLIVENESS_STATE_XCPT_OR_CALL;
    4034 
    4035     return IEMLIVENESS_STATE_UNUSED;
    4036 }
    4037 # endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
    4038 
    4039 
    4040 DECL_FORCE_INLINE(uint32_t)
    4041 iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
    4042 {
    4043 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4044     return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
    4045          | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
    4046 # else
    4047     return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx)       & 1)
    4048          | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
    4049          | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
    4050          | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
    4051 # endif
    4052 }
    4053 
    4054 
    4055 DECL_FORCE_INLINE(uint32_t)
    4056 iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
    4057 {
    4058     uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
    4059     if (enmGstReg == kIemNativeGstReg_EFlags)
    4060     {
    4061         /* Merge the eflags states to one. */
    4062 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4063         uRet  = RT_BIT_32(uRet);
    4064         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
    4065         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
    4066         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
    4067         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
    4068         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
    4069         uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
    4070         uRet  = iemNativeLivenessMergeExpandedEFlagsState(uRet);
    4071 # else
    4072         AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
    4073         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
    4074         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
    4075         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
    4076         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
    4077         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
    4078         uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
    4079 # endif
    4080     }
    4081     return uRet;
    4082 }
    4083 
    4084 
    4085 # ifdef VBOX_STRICT
    4086 /** For assertions only, user checks that idxCurCall isn't zerow. */
    4087 DECL_FORCE_INLINE(uint32_t)
    4088 iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
    4089 {
    4090     return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
    4091 }
    4092 # endif /* VBOX_STRICT */
    4093 
    4094 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    4095 
    4096 /**
    4097  * Marks host register @a idxHstReg as containing a shadow copy of guest
    4098  * register @a enmGstReg.
    4099  *
    4100  * ASSUMES that caller has made sure @a enmGstReg is not associated with any
    4101  * host register before calling.
    4102  */
    4103 DECL_FORCE_INLINE(void)
    4104 iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4105 {
    4106     Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
    4107     Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4108     Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
    4109 
    4110     pReNative->Core.aidxGstRegShadows[enmGstReg]       = idxHstReg;
    4111     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
    4112     pReNative->Core.bmGstRegShadows                   |= RT_BIT_64(enmGstReg);
    4113     pReNative->Core.bmHstRegsWithGstShadow            |= RT_BIT_32(idxHstReg);
    4114 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4115     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4116     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
    4117 #else
    4118     RT_NOREF(off);
    4119 #endif
    4120 }
    4121 
    4122 
    4123 /**
    4124  * Clear any guest register shadow claims from @a idxHstReg.
    4125  *
    4126  * The register does not need to be shadowing any guest registers.
    4127  */
    4128 DECL_FORCE_INLINE(void)
    4129 iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
    4130 {
    4131     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4132               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    4133            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4134     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    4135            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4136 
    4137 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4138     uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4139     if (fGstRegs)
    4140     {
    4141         Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
    4142         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4143         while (fGstRegs)
    4144         {
    4145             unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4146             fGstRegs &= ~RT_BIT_64(iGstReg);
    4147             iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
    4148         }
    4149     }
    4150 #else
    4151     RT_NOREF(off);
    4152 #endif
    4153 
    4154     pReNative->Core.bmHstRegsWithGstShadow            &= ~RT_BIT_32(idxHstReg);
    4155     pReNative->Core.bmGstRegShadows                   &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4156     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4157 }
    4158 
    4159 
    4160 /**
    4161  * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
    4162  * and global overview flags.
    4163  */
    4164 DECL_FORCE_INLINE(void)
    4165 iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4166 {
    4167     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4168     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4169               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    4170            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4171     Assert(pReNative->Core.bmGstRegShadows                    & RT_BIT_64(enmGstReg));
    4172     Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4173     Assert(pReNative->Core.bmHstRegsWithGstShadow             & RT_BIT_32(idxHstReg));
    4174 
    4175 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4176     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4177     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
    4178 #else
    4179     RT_NOREF(off);
    4180 #endif
    4181 
    4182     uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    4183     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    4184     if (!fGstRegShadowsNew)
    4185         pReNative->Core.bmHstRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
    4186     pReNative->Core.bmGstRegShadows                   &= ~RT_BIT_64(enmGstReg);
    4187 }
    4188 
    4189 
    4190 #if 0 /* unused */
    4191 /**
    4192  * Clear any guest register shadow claim for @a enmGstReg.
    4193  */
    4194 DECL_FORCE_INLINE(void)
    4195 iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4196 {
    4197     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4198     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4199     {
    4200         Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4201         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    4202     }
    4203 }
    4204 #endif
    4205 
    4206 
    4207 /**
    4208  * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
    4209  * as the new shadow of it.
    4210  *
    4211  * Unlike the other guest reg shadow helpers, this does the logging for you.
    4212  * However, it is the liveness state is not asserted here, the caller must do
    4213  * that.
    4214  */
    4215 DECL_FORCE_INLINE(void)
    4216 iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
    4217                                        IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4218 {
    4219     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4220     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4221     {
    4222         uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4223         Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4224         if (idxHstRegOld == idxHstRegNew)
    4225             return;
    4226         Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
    4227                g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
    4228         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    4229     }
    4230     else
    4231         Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
    4232                g_aGstShadowInfo[enmGstReg].pszName));
    4233     iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
    4234 }
    4235 
    4236 
    4237 /**
    4238  * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
    4239  * to @a idxRegTo.
    4240  */
    4241 DECL_FORCE_INLINE(void)
    4242 iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
    4243                                     IEMNATIVEGSTREG enmGstReg, uint32_t off)
    4244 {
    4245     Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
    4246     Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
    4247     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
    4248               == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
    4249            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4250     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
    4251            == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
    4252     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
    4253            == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
    4254 
    4255     uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    4256     pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
    4257     if (!fGstRegShadowsFrom)
    4258         pReNative->Core.bmHstRegsWithGstShadow          &= ~RT_BIT_32(idxRegFrom);
    4259     pReNative->Core.bmHstRegsWithGstShadow              |= RT_BIT_32(idxRegTo);
    4260     pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows   |= RT_BIT_64(enmGstReg);
    4261     pReNative->Core.aidxGstRegShadows[enmGstReg]         = idxRegTo;
    4262 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4263     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4264     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
    4265 #else
    4266     RT_NOREF(off);
    4267 #endif
    4268 }
    4269 
    4270 
    4271 /**
    4272  * Allocates a temporary host general purpose register for keeping a guest
    4273  * register value.
    4274  *
    4275  * Since we may already have a register holding the guest register value,
    4276  * code will be emitted to do the loading if that's not the case. Code may also
    4277  * be emitted if we have to free up a register to satify the request.
    4278  *
    4279  * @returns The host register number; throws VBox status code on failure, so no
    4280  *          need to check the return value.
    4281  * @param   pReNative       The native recompile state.
    4282  * @param   poff            Pointer to the variable with the code buffer
    4283  *                          position. This will be update if we need to move a
    4284  *                          variable from register to stack in order to satisfy
    4285  *                          the request.
    4286  * @param   enmGstReg       The guest register that will is to be updated.
    4287  * @param   enmIntendedUse  How the caller will be using the host register.
    4288  * @param   fNoVolatileRegs Set if no volatile register allowed, clear if any
    4289  *                          register is okay (default).  The ASSUMPTION here is
    4290  *                          that the caller has already flushed all volatile
    4291  *                          registers, so this is only applied if we allocate a
    4292  *                          new register.
    4293  * @param   fSkipLivenessAssert     Hack for liveness input validation of EFLAGS.
    4294  * @sa      iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
    4295  */
    4296 DECL_HIDDEN_THROW(uint8_t)
    4297 iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
    4298                                 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
    4299                                 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
    4300 {
    4301     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    4302 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4303     AssertMsg(   fSkipLivenessAssert
    4304               || pReNative->idxCurCall == 0
    4305               || enmGstReg == kIemNativeGstReg_Pc
    4306               || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    4307                   ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4308                   : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
    4309                   ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4310                   : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(  iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
    4311               ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
    4312 #endif
    4313     RT_NOREF(fSkipLivenessAssert);
    4314 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)
    4315     static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
    4316 #endif
    4317     uint32_t const fRegMask = !fNoVolatileRegs
    4318                             ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
    4319                             : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4320 
    4321     /*
    4322      * First check if the guest register value is already in a host register.
    4323      */
    4324     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4325     {
    4326         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4327         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4328         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4329         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4330 
    4331         /* It's not supposed to be allocated... */
    4332         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    4333         {
    4334             /*
    4335              * If the register will trash the guest shadow copy, try find a
    4336              * completely unused register we can use instead.  If that fails,
    4337              * we need to disassociate the host reg from the guest reg.
    4338              */
    4339             /** @todo would be nice to know if preserving the register is in any way helpful. */
    4340             /* If the purpose is calculations, try duplicate the register value as
    4341                we'll be clobbering the shadow. */
    4342             if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
    4343                 && (  ~pReNative->Core.bmHstRegs
    4344                     & ~pReNative->Core.bmHstRegsWithGstShadow
    4345                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
    4346             {
    4347                 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
    4348 
    4349                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4350 
    4351                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    4352                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4353                        g_apszIemNativeHstRegNames[idxRegNew]));
    4354                 idxReg = idxRegNew;
    4355             }
    4356             /* If the current register matches the restrictions, go ahead and allocate
    4357                it for the caller. */
    4358             else if (fRegMask & RT_BIT_32(idxReg))
    4359             {
    4360                 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    4361                 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    4362                 pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4363                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4364                     Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
    4365                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4366                 else
    4367                 {
    4368                     iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    4369                     Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
    4370                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    4371                 }
    4372             }
    4373             /* Otherwise, allocate a register that satisfies the caller and transfer
    4374                the shadowing if compatible with the intended use.  (This basically
    4375                means the call wants a non-volatile register (RSP push/pop scenario).) */
    4376             else
    4377             {
    4378                 Assert(fNoVolatileRegs);
    4379                 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
    4380                                                                     !fNoVolatileRegs
    4381                                                                  && enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4382                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4383                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4384                 {
    4385                     iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    4386                     Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
    4387                            g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
    4388                            g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4389                 }
    4390                 else
    4391                     Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    4392                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4393                            g_apszIemNativeHstRegNames[idxRegNew]));
    4394                 idxReg = idxRegNew;
    4395             }
    4396         }
    4397         else
    4398         {
    4399             /*
    4400              * Oops. Shadowed guest register already allocated!
    4401              *
    4402              * Allocate a new register, copy the value and, if updating, the
    4403              * guest shadow copy assignment to the new register.
    4404              */
    4405             AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    4406                       && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
    4407                       ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
    4408                        idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
    4409 
    4410             /** @todo share register for readonly access. */
    4411             uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
    4412                                                              enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4413 
    4414             if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4415                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4416 
    4417             if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    4418                 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4419                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
    4420                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4421                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    4422             else
    4423             {
    4424                 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    4425                 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
    4426                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4427                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    4428             }
    4429             idxReg = idxRegNew;
    4430         }
    4431         Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
    4432 
    4433 #ifdef VBOX_STRICT
    4434         /* Strict builds: Check that the value is correct. */
    4435         *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    4436 #endif
    4437 
    4438         return idxReg;
    4439     }
    4440 
    4441     /*
    4442      * Allocate a new register, load it with the guest value and designate it as a copy of the
    4443      */
    4444     uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4445 
    4446     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4447         *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
    4448 
    4449     if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4450         iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
    4451     Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
    4452            g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4453 
    4454     return idxRegNew;
    4455 }
    4456 
    4457 
    4458 /**
    4459  * Allocates a temporary host general purpose register that already holds the
    4460  * given guest register value.
    4461  *
    4462  * The use case for this function is places where the shadowing state cannot be
    4463  * modified due to branching and such.  This will fail if the we don't have a
    4464  * current shadow copy handy or if it's incompatible.  The only code that will
    4465  * be emitted here is value checking code in strict builds.
    4466  *
    4467  * The intended use can only be readonly!
    4468  *
    4469  * @returns The host register number, UINT8_MAX if not present.
    4470  * @param   pReNative       The native recompile state.
    4471  * @param   poff            Pointer to the instruction buffer offset.
    4472  *                          Will be updated in strict builds if a register is
    4473  *                          found.
    4474  * @param   enmGstReg       The guest register that will is to be updated.
    4475  * @note    In strict builds, this may throw instruction buffer growth failures.
    4476  *          Non-strict builds will not throw anything.
    4477  * @sa iemNativeRegAllocTmpForGuestReg
    4478  */
    4479 DECL_HIDDEN_THROW(uint8_t)
    4480 iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
    4481 {
    4482     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    4483 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4484     AssertMsg(   pReNative->idxCurCall == 0
    4485               || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4486               || enmGstReg == kIemNativeGstReg_Pc,
    4487               ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
    4488 #endif
    4489 
    4490     /*
    4491      * First check if the guest register value is already in a host register.
    4492      */
    4493     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4494     {
    4495         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4496         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4497         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4498         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4499 
    4500         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    4501         {
    4502             /*
    4503              * We only do readonly use here, so easy compared to the other
    4504              * variant of this code.
    4505              */
    4506             pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    4507             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    4508             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4509             Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
    4510                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    4511 
    4512 #ifdef VBOX_STRICT
    4513             /* Strict builds: Check that the value is correct. */
    4514             *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    4515 #else
    4516             RT_NOREF(poff);
    4517 #endif
    4518             return idxReg;
    4519         }
    4520     }
    4521 
    4522     return UINT8_MAX;
    4523 }
    4524 
    4525 
    4526 /**
    4527  * Allocates argument registers for a function call.
    4528  *
    4529  * @returns New code buffer offset on success; throws VBox status code on failure, so no
    4530  *          need to check the return value.
    4531  * @param   pReNative   The native recompile state.
    4532  * @param   off         The current code buffer offset.
    4533  * @param   cArgs       The number of arguments the function call takes.
    4534  */
    4535 DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
    4536 {
    4537     AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
    4538                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
    4539     Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    4540     Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    4541 
    4542     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    4543         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    4544     else if (cArgs == 0)
    4545         return true;
    4546 
    4547     /*
    4548      * Do we get luck and all register are free and not shadowing anything?
    4549      */
    4550     if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
    4551         for (uint32_t i = 0; i < cArgs; i++)
    4552         {
    4553             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    4554             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    4555             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4556             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4557         }
    4558     /*
    4559      * Okay, not lucky so we have to free up the registers.
    4560      */
    4561     else
    4562         for (uint32_t i = 0; i < cArgs; i++)
    4563         {
    4564             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    4565             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
    4566             {
    4567                 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    4568                 {
    4569                     case kIemNativeWhat_Var:
    4570                     {
    4571                         uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    4572                         IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4573                         AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
    4574                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    4575                         Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
    4576 
    4577                         if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
    4578                             pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
    4579                         else
    4580                         {
    4581                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    4582                             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    4583                         }
    4584                         break;
    4585                     }
    4586 
    4587                     case kIemNativeWhat_Tmp:
    4588                     case kIemNativeWhat_Arg:
    4589                     case kIemNativeWhat_rc:
    4590                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    4591                     default:
    4592                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
    4593                 }
    4594 
    4595             }
    4596             if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    4597             {
    4598                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    4599                 Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    4600                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    4601                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4602                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4603                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4604             }
    4605             else
    4606                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4607             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    4608             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4609         }
    4610     pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
    4611     return true;
    4612 }
    4613 
    4614 
    4615 DECL_HIDDEN_THROW(uint8_t)  iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
    4616 
    4617 
    4618 #if 0
    4619 /**
    4620  * Frees a register assignment of any type.
    4621  *
    4622  * @param   pReNative       The native recompile state.
    4623  * @param   idxHstReg       The register to free.
    4624  *
    4625  * @note    Does not update variables.
    4626  */
    4627 DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4628 {
    4629     Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4630     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4631     Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
    4632     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
    4633            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
    4634            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
    4635            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
    4636     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
    4637            || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
    4638            || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
    4639     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4640            == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4641     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    4642            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4643 
    4644     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxHstReg);
    4645     /* no flushing, right:
    4646     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4647     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4648     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4649     */
    4650 }
    4651 #endif
    4652 
    4653 
    4654 /**
    4655  * Frees a temporary register.
    4656  *
    4657  * Any shadow copies of guest registers assigned to the host register will not
    4658  * be flushed by this operation.
    4659  */
    4660 DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4661 {
    4662     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4663     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
    4664     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    4665     Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
    4666            g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4667 }
    4668 
    4669 
    4670 /**
    4671  * Frees a temporary immediate register.
    4672  *
    4673  * It is assumed that the call has not modified the register, so it still hold
    4674  * the same value as when it was allocated via iemNativeRegAllocTmpImm().
    4675  */
    4676 DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4677 {
    4678     iemNativeRegFreeTmp(pReNative, idxHstReg);
    4679 }
    4680 
    4681 
    4682 /**
    4683  * Frees a register assigned to a variable.
    4684  *
    4685  * The register will be disassociated from the variable.
    4686  */
    4687 DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
    4688 {
    4689     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4690     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
    4691     uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    4692     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4693     Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
    4694 
    4695     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
    4696     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    4697     if (!fFlushShadows)
    4698         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
    4699                g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
    4700     else
    4701     {
    4702         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4703         uint64_t const fGstRegShadowsOld        = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4704         pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4705         pReNative->Core.bmGstRegShadows        &= ~fGstRegShadowsOld;
    4706         uint64_t       fGstRegShadows           = fGstRegShadowsOld;
    4707         while (fGstRegShadows)
    4708         {
    4709             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    4710             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    4711 
    4712             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
    4713             pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
    4714         }
    4715         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
    4716                g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
    4717     }
    4718 }
    4719 
    4720 
    4721 /**
    4722  * Called right before emitting a call instruction to move anything important
    4723  * out of call-volatile registers, free and flush the call-volatile registers,
    4724  * optionally freeing argument variables.
    4725  *
    4726  * @returns New code buffer offset, UINT32_MAX on failure.
    4727  * @param   pReNative       The native recompile state.
    4728  * @param   off             The code buffer offset.
    4729  * @param   cArgs           The number of arguments the function call takes.
    4730  *                          It is presumed that the host register part of these have
    4731  *                          been allocated as such already and won't need moving,
    4732  *                          just freeing.
    4733  * @param   fKeepVars       Mask of variables that should keep their register
    4734  *                          assignments.  Caller must take care to handle these.
    4735  */
    4736 DECL_HIDDEN_THROW(uint32_t)
    4737 iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
    4738 {
    4739     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    4740 
    4741     /* fKeepVars will reduce this mask. */
    4742     uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4743 
    4744     /*
    4745      * Move anything important out of volatile registers.
    4746      */
    4747     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    4748         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    4749     uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4750 #ifdef IEMNATIVE_REG_FIXED_TMP0
    4751                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    4752 #endif
    4753                          & ~g_afIemNativeCallRegs[cArgs];
    4754 
    4755     fRegsToMove &= pReNative->Core.bmHstRegs;
    4756     if (!fRegsToMove)
    4757     { /* likely */ }
    4758     else
    4759     {
    4760         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
    4761         while (fRegsToMove != 0)
    4762         {
    4763             unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
    4764             fRegsToMove &= ~RT_BIT_32(idxReg);
    4765 
    4766             switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    4767             {
    4768                 case kIemNativeWhat_Var:
    4769                 {
    4770                     uint8_t const       idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    4771                     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4772                     PIEMNATIVEVAR const pVar   = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    4773                     Assert(pVar->idxReg == idxReg);
    4774                     if (!(RT_BIT_32(idxVar) & fKeepVars))
    4775                     {
    4776                         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
    4777                                idxVar, pVar->enmKind, pVar->idxReg));
    4778                         if (pVar->enmKind != kIemNativeVarKind_Stack)
    4779                             pVar->idxReg = UINT8_MAX;
    4780                         else
    4781                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    4782                     }
    4783                     else
    4784                         fRegsToFree &= ~RT_BIT_32(idxReg);
    4785                     continue;
    4786                 }
    4787 
    4788                 case kIemNativeWhat_Arg:
    4789                     AssertMsgFailed(("What?!?: %u\n", idxReg));
    4790                     continue;
    4791 
    4792                 case kIemNativeWhat_rc:
    4793                 case kIemNativeWhat_Tmp:
    4794                     AssertMsgFailed(("Missing free: %u\n", idxReg));
    4795                     continue;
    4796 
    4797                 case kIemNativeWhat_FixedTmp:
    4798                 case kIemNativeWhat_pVCpuFixed:
    4799                 case kIemNativeWhat_pCtxFixed:
    4800                 case kIemNativeWhat_FixedReserved:
    4801                 case kIemNativeWhat_Invalid:
    4802                 case kIemNativeWhat_End:
    4803                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
    4804             }
    4805             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
    4806         }
    4807     }
    4808 
    4809     /*
    4810      * Do the actual freeing.
    4811      */
    4812     if (pReNative->Core.bmHstRegs & fRegsToFree)
    4813         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
    4814                pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
    4815     pReNative->Core.bmHstRegs &= ~fRegsToFree;
    4816 
    4817     /* If there are guest register shadows in any call-volatile register, we
    4818        have to clear the corrsponding guest register masks for each register. */
    4819     uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
    4820     if (fHstRegsWithGstShadow)
    4821     {
    4822         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
    4823                pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
    4824         pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
    4825         do
    4826         {
    4827             unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
    4828             fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4829 
    4830             AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
    4831             pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4832             pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4833         } while (fHstRegsWithGstShadow != 0);
    4834     }
    4835 
    4836     return off;
    4837 }
    4838 
    4839 
    4840 /**
    4841  * Flushes a set of guest register shadow copies.
    4842  *
    4843  * This is usually done after calling a threaded function or a C-implementation
    4844  * of an instruction.
    4845  *
    4846  * @param   pReNative       The native recompile state.
    4847  * @param   fGstRegs        Set of guest registers to flush.
    4848  */
    4849 DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
    4850 {
    4851     /*
    4852      * Reduce the mask by what's currently shadowed
    4853      */
    4854     uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
    4855     fGstRegs &= bmGstRegShadowsOld;
    4856     if (fGstRegs)
    4857     {
    4858         uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
    4859         Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
    4860         pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
    4861         if (bmGstRegShadowsNew)
    4862         {
    4863             /*
    4864              * Partial.
    4865              */
    4866             do
    4867             {
    4868                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4869                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    4870                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    4871                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    4872                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    4873 
    4874                 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
    4875                 fGstRegs &= ~fInThisHstReg;
    4876                 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
    4877                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    4878                 if (!fGstRegShadowsNew)
    4879                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4880             } while (fGstRegs != 0);
    4881         }
    4882         else
    4883         {
    4884             /*
    4885              * Clear all.
    4886              */
    4887             do
    4888             {
    4889                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4890                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    4891                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    4892                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    4893                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    4894 
    4895                 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
    4896                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4897             } while (fGstRegs != 0);
    4898             pReNative->Core.bmHstRegsWithGstShadow = 0;
    4899         }
    4900     }
    4901 }
    4902 
    4903 
    4904 /**
    4905  * Flushes guest register shadow copies held by a set of host registers.
    4906  *
    4907  * This is used with the TLB lookup code for ensuring that we don't carry on
    4908  * with any guest shadows in volatile registers, as these will get corrupted by
    4909  * a TLB miss.
    4910  *
    4911  * @param   pReNative       The native recompile state.
    4912  * @param   fHstRegs        Set of host registers to flush guest shadows for.
    4913  */
    4914 DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
    4915 {
    4916     /*
    4917      * Reduce the mask by what's currently shadowed.
    4918      */
    4919     uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
    4920     fHstRegs &= bmHstRegsWithGstShadowOld;
    4921     if (fHstRegs)
    4922     {
    4923         uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
    4924         Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
    4925                fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
    4926         pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
    4927         if (bmHstRegsWithGstShadowNew)
    4928         {
    4929             /*
    4930              * Partial (likely).
    4931              */
    4932             uint64_t fGstShadows = 0;
    4933             do
    4934             {
    4935                 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    4936                 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
    4937                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4938                        == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4939 
    4940                 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4941                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4942                 fHstRegs &= ~RT_BIT_32(idxHstReg);
    4943             } while (fHstRegs != 0);
    4944             pReNative->Core.bmGstRegShadows &= ~fGstShadows;
    4945         }
    4946         else
    4947         {
    4948             /*
    4949              * Clear all.
    4950              */
    4951             do
    4952             {
    4953                 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    4954                 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
    4955                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4956                        == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4957 
    4958                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4959                 fHstRegs &= ~RT_BIT_32(idxHstReg);
    4960             } while (fHstRegs != 0);
    4961             pReNative->Core.bmGstRegShadows = 0;
    4962         }
    4963     }
    4964 }
    4965 
    4966 
    4967 /**
    4968  * Restores guest shadow copies in volatile registers.
    4969  *
    4970  * This is used after calling a helper function (think TLB miss) to restore the
    4971  * register state of volatile registers.
    4972  *
    4973  * @param   pReNative               The native recompile state.
    4974  * @param   off                     The code buffer offset.
    4975  * @param   fHstRegsActiveShadows   Set of host registers which are allowed to
    4976  *                                  be active (allocated) w/o asserting. Hack.
    4977  * @see     iemNativeVarSaveVolatileRegsPreHlpCall(),
    4978  *          iemNativeVarRestoreVolatileRegsPostHlpCall()
    4979  */
    4980 DECL_HIDDEN_THROW(uint32_t)
    4981 iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
    4982 {
    4983     uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4984     if (fHstRegs)
    4985     {
    4986         Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
    4987         do
    4988         {
    4989             unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    4990 
    4991             /* It's not fatal if a register is active holding a variable that
    4992                shadowing a guest register, ASSUMING all pending guest register
    4993                writes were flushed prior to the helper call. However, we'll be
    4994                emitting duplicate restores, so it wasts code space. */
    4995             Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
    4996             RT_NOREF(fHstRegsActiveShadows);
    4997 
    4998             uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4999             Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
    5000             AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
    5001                        IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
    5002 
    5003             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    5004             off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
    5005 
    5006             fHstRegs &= ~RT_BIT_32(idxHstReg);
    5007         } while (fHstRegs != 0);
    5008     }
    5009     return off;
    5010 }
    5011 
    5012 
    5013 /**
    5014  * Flushes delayed write of a specific guest register.
    5015  *
    5016  * This must be called prior to calling CImpl functions and any helpers that use
    5017  * the guest state (like raising exceptions) and such.
    5018  *
    5019  * This optimization has not yet been implemented.  The first target would be
    5020  * RIP updates, since these are the most common ones.
    5021  */
    5022 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5023                                                                   IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
    5024 {
    5025     RT_NOREF(pReNative, enmClass, idxReg);
    5026     return off;
    5027 }
    5028 
    5029 
    5030 /**
    5031  * Flushes any delayed guest register writes.
    5032  *
    5033  * This must be called prior to calling CImpl functions and any helpers that use
    5034  * the guest state (like raising exceptions) and such.
    5035  *
    5036  * This optimization has not yet been implemented.  The first target would be
    5037  * RIP updates, since these are the most common ones.
    5038  */
    5039 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5040 {
    5041     RT_NOREF(pReNative, off);
    5042     return off;
    5043 }
    5044 
    5045 
    5046 #ifdef VBOX_STRICT
    5047 /**
    5048  * Does internal register allocator sanity checks.
    5049  */
    5050 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
    5051 {
    5052     /*
    5053      * Iterate host registers building a guest shadowing set.
    5054      */
    5055     uint64_t bmGstRegShadows        = 0;
    5056     uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
    5057     AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
    5058     while (bmHstRegsWithGstShadow)
    5059     {
    5060         unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
    5061         Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    5062         bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    5063 
    5064         uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    5065         AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
    5066         AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
    5067         bmGstRegShadows |= fThisGstRegShadows;
    5068         while (fThisGstRegShadows)
    5069         {
    5070             unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
    5071             fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
    5072             AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
    5073                       ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
    5074                        idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
    5075         }
    5076     }
    5077     AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
    5078               ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
    5079                bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
    5080 
    5081     /*
    5082      * Now the other way around, checking the guest to host index array.
    5083      */
    5084     bmHstRegsWithGstShadow = 0;
    5085     bmGstRegShadows        = pReNative->Core.bmGstRegShadows;
    5086     Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    5087     while (bmGstRegShadows)
    5088     {
    5089         unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
    5090         Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    5091         bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
    5092 
    5093         uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    5094         AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
    5095         AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
    5096                   ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
    5097                    idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    5098         bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
    5099     }
    5100     AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
    5101               ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
    5102                bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
    5103 }
    5104 #endif
    5105 
    5106 
    5107 /*********************************************************************************************************************************
    5108 *   Code Emitters (larger snippets)                                                                                              *
    5109 *********************************************************************************************************************************/
    5110 
    5111 /**
    5112  * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
    5113  * extending to 64-bit width.
    5114  *
    5115  * @returns New code buffer offset on success, UINT32_MAX on failure.
    5116  * @param   pReNative   .
    5117  * @param   off         The current code buffer position.
    5118  * @param   idxHstReg   The host register to load the guest register value into.
    5119  * @param   enmGstReg   The guest register to load.
    5120  *
    5121  * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
    5122  *       that is something the caller needs to do if applicable.
    5123  */
    5124 DECL_HIDDEN_THROW(uint32_t)
    5125 iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
    5126 {
    5127     Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
    5128     Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
    5129 
    5130     switch (g_aGstShadowInfo[enmGstReg].cb)
    5131     {
    5132         case sizeof(uint64_t):
    5133             return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    5134         case sizeof(uint32_t):
    5135             return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    5136         case sizeof(uint16_t):
    5137             return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    5138 #if 0 /* not present in the table. */
    5139         case sizeof(uint8_t):
    5140             return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    5141 #endif
    5142         default:
    5143             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    5144     }
    5145 }
    5146 
    5147 
    5148 #ifdef VBOX_STRICT
    5149 /**
    5150  * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
    5151  *
    5152  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    5153  *       Trashes EFLAGS on AMD64.
    5154  */
    5155 static uint32_t
    5156 iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
    5157 {
    5158 # ifdef RT_ARCH_AMD64
    5159     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    5160 
    5161     /* rol reg64, 32 */
    5162     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    5163     pbCodeBuf[off++] = 0xc1;
    5164     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    5165     pbCodeBuf[off++] = 32;
    5166 
    5167     /* test reg32, ffffffffh */
    5168     if (idxReg >= 8)
    5169         pbCodeBuf[off++] = X86_OP_REX_B;
    5170     pbCodeBuf[off++] = 0xf7;
    5171     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    5172     pbCodeBuf[off++] = 0xff;
    5173     pbCodeBuf[off++] = 0xff;
    5174     pbCodeBuf[off++] = 0xff;
    5175     pbCodeBuf[off++] = 0xff;
    5176 
    5177     /* je/jz +1 */
    5178     pbCodeBuf[off++] = 0x74;
    5179     pbCodeBuf[off++] = 0x01;
    5180 
    5181     /* int3 */
    5182     pbCodeBuf[off++] = 0xcc;
    5183 
    5184     /* rol reg64, 32 */
    5185     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    5186     pbCodeBuf[off++] = 0xc1;
    5187     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    5188     pbCodeBuf[off++] = 32;
    5189 
    5190 # elif defined(RT_ARCH_ARM64)
    5191     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    5192     /* lsr tmp0, reg64, #32 */
    5193     pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
    5194     /* cbz tmp0, +1 */
    5195     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    5196     /* brk #0x1100 */
    5197     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
    5198 
    5199 # else
    5200 #  error "Port me!"
    5201 # endif
    5202     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5203     return off;
    5204 }
    5205 #endif /* VBOX_STRICT */
    5206 
    5207 
    5208 #ifdef VBOX_STRICT
    5209 /**
    5210  * Emitting code that checks that the content of register @a idxReg is the same
    5211  * as what's in the guest register @a enmGstReg, resulting in a breakpoint
    5212  * instruction if that's not the case.
    5213  *
    5214  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    5215  *       Trashes EFLAGS on AMD64.
    5216  */
    5217 static uint32_t
    5218 iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
    5219 {
    5220 # ifdef RT_ARCH_AMD64
    5221     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    5222 
    5223     /* cmp reg, [mem] */
    5224     if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
    5225     {
    5226         if (idxReg >= 8)
    5227             pbCodeBuf[off++] = X86_OP_REX_R;
    5228         pbCodeBuf[off++] = 0x38;
    5229     }
    5230     else
    5231     {
    5232         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
    5233             pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
    5234         else
    5235         {
    5236             if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
    5237                 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    5238             else
    5239                 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
    5240                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
    5241             if (idxReg >= 8)
    5242                 pbCodeBuf[off++] = X86_OP_REX_R;
    5243         }
    5244         pbCodeBuf[off++] = 0x39;
    5245     }
    5246     off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
    5247 
    5248     /* je/jz +1 */
    5249     pbCodeBuf[off++] = 0x74;
    5250     pbCodeBuf[off++] = 0x01;
    5251 
    5252     /* int3 */
    5253     pbCodeBuf[off++] = 0xcc;
    5254 
    5255     /* For values smaller than the register size, we must check that the rest
    5256        of the register is all zeros. */
    5257     if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
    5258     {
    5259         /* test reg64, imm32 */
    5260         pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    5261         pbCodeBuf[off++] = 0xf7;
    5262         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    5263         pbCodeBuf[off++] = 0;
    5264         pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
    5265         pbCodeBuf[off++] = 0xff;
    5266         pbCodeBuf[off++] = 0xff;
    5267 
    5268         /* je/jz +1 */
    5269         pbCodeBuf[off++] = 0x74;
    5270         pbCodeBuf[off++] = 0x01;
    5271 
    5272         /* int3 */
    5273         pbCodeBuf[off++] = 0xcc;
    5274         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5275     }
    5276     else
    5277     {
    5278         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5279         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
    5280             iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
    5281     }
    5282 
    5283 # elif defined(RT_ARCH_ARM64)
    5284     /* mov TMP0, [gstreg] */
    5285     off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
    5286 
    5287     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    5288     /* sub tmp0, tmp0, idxReg */
    5289     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
    5290     /* cbz tmp0, +1 */
    5291     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    5292     /* brk #0x1000+enmGstReg */
    5293     pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
    5294     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5295 
    5296 # else
    5297 #  error "Port me!"
    5298 # endif
    5299     return off;
    5300 }
    5301 #endif /* VBOX_STRICT */
    5302 
    5303 
    5304 #ifdef VBOX_STRICT
    5305 /**
    5306  * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
    5307  * important bits.
    5308  *
    5309  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    5310  *       Trashes EFLAGS on AMD64.
    5311  */
    5312 static uint32_t
    5313 iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
    5314 {
    5315     uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
    5316     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
    5317     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
    5318     off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
    5319 
    5320 #ifdef RT_ARCH_AMD64
    5321     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    5322 
    5323     /* je/jz +1 */
    5324     pbCodeBuf[off++] = 0x74;
    5325     pbCodeBuf[off++] = 0x01;
    5326 
    5327     /* int3 */
    5328     pbCodeBuf[off++] = 0xcc;
    5329 
    5330 # elif defined(RT_ARCH_ARM64)
    5331     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    5332 
    5333     /* b.eq +1 */
    5334     pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
    5335     /* brk #0x2000 */
    5336     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
    5337 
    5338 # else
    5339 #  error "Port me!"
    5340 # endif
    5341     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5342 
    5343     iemNativeRegFreeTmp(pReNative, idxRegTmp);
    5344     return off;
    5345 }
    5346 #endif /* VBOX_STRICT */
    5347 
    5348 
    5349 /**
    5350  * Emits a code for checking the return code of a call and rcPassUp, returning
    5351  * from the code if either are non-zero.
    5352  */
    5353 DECL_HIDDEN_THROW(uint32_t)
    5354 iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    5355 {
    5356 #ifdef RT_ARCH_AMD64
    5357     /*
    5358      * AMD64: eax = call status code.
    5359      */
    5360 
    5361     /* edx = rcPassUp */
    5362     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    5363 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5364     off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
    5365 # endif
    5366 
    5367     /* edx = eax | rcPassUp */
    5368     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    5369     pbCodeBuf[off++] = 0x0b;                    /* or edx, eax */
    5370     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
    5371     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5372 
    5373     /* Jump to non-zero status return path. */
    5374     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
    5375 
    5376     /* done. */
    5377 
    5378 #elif RT_ARCH_ARM64
    5379     /*
    5380      * ARM64: w0 = call status code.
    5381      */
    5382 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5383     off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
    5384 # endif
    5385     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    5386 
    5387     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    5388 
    5389     pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
    5390 
    5391     uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    5392     iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
    5393     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
    5394 
    5395 #else
    5396 # error "port me"
    5397 #endif
    5398     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5399     RT_NOREF_PV(idxInstr);
    5400     return off;
    5401 }
    5402 
    5403 
    5404 /**
    5405  * Emits code to check if the content of @a idxAddrReg is a canonical address,
    5406  * raising a \#GP(0) if it isn't.
    5407  *
    5408  * @returns New code buffer offset, UINT32_MAX on failure.
    5409  * @param   pReNative       The native recompile state.
    5410  * @param   off             The code buffer offset.
    5411  * @param   idxAddrReg      The host register with the address to check.
    5412  * @param   idxInstr        The current instruction.
    5413  */
    5414 DECL_HIDDEN_THROW(uint32_t)
    5415 iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
    5416 {
    5417     /*
    5418      * Make sure we don't have any outstanding guest register writes as we may
    5419      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    5420      */
    5421     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5422 
    5423 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5424     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    5425 #else
    5426     RT_NOREF(idxInstr);
    5427 #endif
    5428 
    5429 #ifdef RT_ARCH_AMD64
    5430     /*
    5431      * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
    5432      *     return raisexcpt();
    5433      * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
    5434      */
    5435     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    5436 
    5437     off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
    5438     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
    5439     off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
    5440     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
    5441     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    5442 
    5443     iemNativeRegFreeTmp(pReNative, iTmpReg);
    5444 
    5445 #elif defined(RT_ARCH_ARM64)
    5446     /*
    5447      * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
    5448      *     return raisexcpt();
    5449      * ----
    5450      *     mov     x1, 0x800000000000
    5451      *     add     x1, x0, x1
    5452      *     cmp     xzr, x1, lsr 48
    5453      *     b.ne    .Lraisexcpt
    5454      */
    5455     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    5456 
    5457     off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
    5458     off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
    5459     off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
    5460     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    5461 
    5462     iemNativeRegFreeTmp(pReNative, iTmpReg);
    5463 
    5464 #else
    5465 # error "Port me"
    5466 #endif
    5467     return off;
    5468 }
    5469 
    5470 
    5471 /**
    5472  * Emits code to check if that the content of @a idxAddrReg is within the limit
    5473  * of CS, raising a \#GP(0) if it isn't.
    5474  *
    5475  * @returns New code buffer offset; throws VBox status code on error.
    5476  * @param   pReNative       The native recompile state.
    5477  * @param   off             The code buffer offset.
    5478  * @param   idxAddrReg      The host register (32-bit) with the address to
    5479  *                          check.
    5480  * @param   idxInstr        The current instruction.
    5481  */
    5482 DECL_HIDDEN_THROW(uint32_t)
    5483 iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5484                                                       uint8_t idxAddrReg, uint8_t idxInstr)
    5485 {
    5486     /*
    5487      * Make sure we don't have any outstanding guest register writes as we may
    5488      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    5489      */
    5490     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5491 
    5492 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5493     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    5494 #else
    5495     RT_NOREF(idxInstr);
    5496 #endif
    5497 
    5498     uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
    5499                                                                 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
    5500                                                                 kIemNativeGstRegUse_ReadOnly);
    5501 
    5502     off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
    5503     off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    5504 
    5505     iemNativeRegFreeTmp(pReNative, idxRegCsLim);
    5506     return off;
    5507 }
    5508 
    5509 
    5510 /**
    5511  * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
    5512  *
    5513  * @returns The flush mask.
    5514  * @param   fCImpl          The IEM_CIMPL_F_XXX flags.
    5515  * @param   fGstShwFlush    The starting flush mask.
    5516  */
    5517 DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
    5518 {
    5519     if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
    5520         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_CS)
    5521                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_CS)
    5522                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
    5523     if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
    5524         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
    5525                      |  RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_SS)
    5526                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_SS)
    5527                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
    5528     else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
    5529         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
    5530     if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
    5531         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    5532     return fGstShwFlush;
    5533 }
    5534 
    5535 
    5536 /**
    5537  * Emits a call to a CImpl function or something similar.
    5538  */
    5539 DECL_HIDDEN_THROW(uint32_t)
    5540 iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
    5541                        uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
    5542 {
    5543     /*
    5544      * Flush stuff. PC and EFlags are implictly flushed, the latter because we
    5545      * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
    5546      */
    5547     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
    5548                                                              fGstShwFlush
    5549                                                              | RT_BIT_64(kIemNativeGstReg_Pc)
    5550                                                              | RT_BIT_64(kIemNativeGstReg_EFlags));
    5551     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    5552 
    5553     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    5554 
    5555     /*
    5556      * Load the parameters.
    5557      */
    5558 #if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
    5559     /* Special code the hidden VBOXSTRICTRC pointer. */
    5560     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5561     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    5562     if (cAddParams > 0)
    5563         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
    5564     if (cAddParams > 1)
    5565         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
    5566     if (cAddParams > 2)
    5567         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
    5568     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    5569 
    5570 #else
    5571     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    5572     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5573     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    5574     if (cAddParams > 0)
    5575         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
    5576     if (cAddParams > 1)
    5577         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
    5578     if (cAddParams > 2)
    5579 # if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
    5580         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
    5581 # else
    5582         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
    5583 # endif
    5584 #endif
    5585 
    5586     /*
    5587      * Make the call.
    5588      */
    5589     off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
    5590 
    5591 #if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    5592     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    5593 #endif
    5594 
    5595     /*
    5596      * Check the status code.
    5597      */
    5598     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    5599 }
    5600 
    5601 
    5602 /**
    5603  * Emits a call to a threaded worker function.
    5604  */
    5605 DECL_HIDDEN_THROW(uint32_t)
    5606 iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
    5607 {
    5608     iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
    5609     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    5610 
    5611 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5612     /* The threaded function may throw / long jmp, so set current instruction
    5613        number if we're counting. */
    5614     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    5615 #endif
    5616 
    5617     uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
    5618 
    5619 #ifdef RT_ARCH_AMD64
    5620     /* Load the parameters and emit the call. */
    5621 # ifdef RT_OS_WINDOWS
    5622 #  ifndef VBOXSTRICTRC_STRICT_ENABLED
    5623     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    5624     if (cParams > 0)
    5625         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
    5626     if (cParams > 1)
    5627         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
    5628     if (cParams > 2)
    5629         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
    5630 #  else  /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
    5631     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
    5632     if (cParams > 0)
    5633         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
    5634     if (cParams > 1)
    5635         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
    5636     if (cParams > 2)
    5637     {
    5638         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
    5639         off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
    5640     }
    5641     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    5642 #  endif /* VBOXSTRICTRC_STRICT_ENABLED */
    5643 # else
    5644     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    5645     if (cParams > 0)
    5646         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
    5647     if (cParams > 1)
    5648         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
    5649     if (cParams > 2)
    5650         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
    5651 # endif
    5652 
    5653     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    5654 
    5655 # if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    5656     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    5657 # endif
    5658 
    5659 #elif RT_ARCH_ARM64
    5660     /*
    5661      * ARM64:
    5662      */
    5663     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5664     if (cParams > 0)
    5665         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
    5666     if (cParams > 1)
    5667         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
    5668     if (cParams > 2)
    5669         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
    5670 
    5671     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    5672 
    5673 #else
    5674 # error "port me"
    5675 #endif
    5676 
    5677     /*
    5678      * Check the status code.
    5679      */
    5680     off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
    5681 
    5682     return off;
    5683 }
    5684 
    5685 #ifdef VBOX_WITH_STATISTICS
    5686 /**
    5687  * Emits code to update the thread call statistics.
    5688  */
    5689 DECL_INLINE_THROW(uint32_t)
    5690 iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
    5691 {
    5692     /*
    5693      * Update threaded function stats.
    5694      */
    5695     uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
    5696     AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
    5697 # if defined(RT_ARCH_ARM64)
    5698     uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
    5699     uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
    5700     off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
    5701     iemNativeRegFreeTmp(pReNative, idxTmp1);
    5702     iemNativeRegFreeTmp(pReNative, idxTmp2);
    5703 # else
    5704     off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
    5705 # endif
    5706     return off;
    5707 }
    5708 #endif /* VBOX_WITH_STATISTICS */
    5709 
    5710 
    5711 /**
    5712  * Emits the code at the CheckBranchMiss label.
    5713  */
    5714 static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5715 {
    5716     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
    5717     if (idxLabel != UINT32_MAX)
    5718     {
    5719         iemNativeLabelDefine(pReNative, idxLabel, off);
    5720 
    5721         /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
    5722         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5723         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
    5724 
    5725         /* jump back to the return sequence. */
    5726         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5727     }
    5728     return off;
    5729 }
    5730 
    5731 
    5732 /**
    5733  * Emits the code at the NeedCsLimChecking label.
    5734  */
    5735 static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5736 {
    5737     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
    5738     if (idxLabel != UINT32_MAX)
    5739     {
    5740         iemNativeLabelDefine(pReNative, idxLabel, off);
    5741 
    5742         /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
    5743         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5744         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
    5745 
    5746         /* jump back to the return sequence. */
    5747         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5748     }
    5749     return off;
    5750 }
    5751 
    5752 
    5753 /**
    5754  * Emits the code at the ObsoleteTb label.
    5755  */
    5756 static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5757 {
    5758     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
    5759     if (idxLabel != UINT32_MAX)
    5760     {
    5761         iemNativeLabelDefine(pReNative, idxLabel, off);
    5762 
    5763         /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
    5764         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5765         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
    5766 
    5767         /* jump back to the return sequence. */
    5768         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5769     }
    5770     return off;
    5771 }
    5772 
    5773 
    5774 /**
    5775  * Emits the code at the RaiseGP0 label.
    5776  */
    5777 static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5778 {
    5779     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
    5780     if (idxLabel != UINT32_MAX)
    5781     {
    5782         iemNativeLabelDefine(pReNative, idxLabel, off);
    5783 
    5784         /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
    5785         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5786         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
    5787 
    5788         /* jump back to the return sequence. */
    5789         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5790     }
    5791     return off;
    5792 }
    5793 
    5794 
    5795 /**
    5796  * Emits the code at the RaiseNm label.
    5797  */
    5798 static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5799 {
    5800     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
    5801     if (idxLabel != UINT32_MAX)
    5802     {
    5803         iemNativeLabelDefine(pReNative, idxLabel, off);
    5804 
    5805         /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
    5806         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5807         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
    5808 
    5809         /* jump back to the return sequence. */
    5810         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5811     }
    5812     return off;
    5813 }
    5814 
    5815 
    5816 /**
    5817  * Emits the code at the RaiseUd label.
    5818  */
    5819 static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5820 {
    5821     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
    5822     if (idxLabel != UINT32_MAX)
    5823     {
    5824         iemNativeLabelDefine(pReNative, idxLabel, off);
    5825 
    5826         /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
    5827         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5828         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
    5829 
    5830         /* jump back to the return sequence. */
    5831         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5832     }
    5833     return off;
    5834 }
    5835 
    5836 
    5837 /**
    5838  * Emits the code at the ReturnWithFlags label (returns
    5839  * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
    5840  */
    5841 static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5842 {
    5843     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
    5844     if (idxLabel != UINT32_MAX)
    5845     {
    5846         iemNativeLabelDefine(pReNative, idxLabel, off);
    5847 
    5848         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
    5849 
    5850         /* jump back to the return sequence. */
    5851         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5852     }
    5853     return off;
    5854 }
    5855 
    5856 
    5857 /**
    5858  * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
    5859  */
    5860 static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5861 {
    5862     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
    5863     if (idxLabel != UINT32_MAX)
    5864     {
    5865         iemNativeLabelDefine(pReNative, idxLabel, off);
    5866 
    5867         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
    5868 
    5869         /* jump back to the return sequence. */
    5870         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5871     }
    5872     return off;
    5873 }
    5874 
    5875 
    5876 /**
    5877  * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
    5878  */
    5879 static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5880 {
    5881     /*
    5882      * Generate the rc + rcPassUp fiddling code if needed.
    5883      */
    5884     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    5885     if (idxLabel != UINT32_MAX)
    5886     {
    5887         iemNativeLabelDefine(pReNative, idxLabel, off);
    5888 
    5889         /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
    5890 #ifdef RT_ARCH_AMD64
    5891 # ifdef RT_OS_WINDOWS
    5892 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5893         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8,  X86_GREG_xCX); /* cl = instruction number */
    5894 #  endif
    5895         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    5896         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
    5897 # else
    5898         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    5899         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
    5900 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5901         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
    5902 #  endif
    5903 # endif
    5904 # ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5905         off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
    5906 # endif
    5907 
    5908 #else
    5909         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
    5910         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5911         /* IEMNATIVE_CALL_ARG2_GREG is already set. */
    5912 #endif
    5913 
    5914         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
    5915         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5916     }
    5917     return off;
    5918 }
    5919 
    5920 
    5921 /**
    5922  * Emits a standard epilog.
    5923  */
    5924 static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
    5925 {
    5926     *pidxReturnLabel = UINT32_MAX;
    5927 
    5928     /*
    5929      * Successful return, so clear the return register (eax, w0).
    5930      */
    5931     off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
    5932 
    5933     /*
    5934      * Define label for common return point.
    5935      */
    5936     uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
    5937     *pidxReturnLabel = idxReturn;
    5938 
    5939     /*
    5940      * Restore registers and return.
    5941      */
    5942 #ifdef RT_ARCH_AMD64
    5943     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    5944 
    5945     /* Reposition esp at the r15 restore point. */
    5946     pbCodeBuf[off++] = X86_OP_REX_W;
    5947     pbCodeBuf[off++] = 0x8d;                    /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
    5948     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
    5949     pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
    5950 
    5951     /* Pop non-volatile registers and return */
    5952     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r15 */
    5953     pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
    5954     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r14 */
    5955     pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
    5956     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r13 */
    5957     pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
    5958     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r12 */
    5959     pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
    5960 # ifdef RT_OS_WINDOWS
    5961     pbCodeBuf[off++] = 0x58 + X86_GREG_xDI;     /* pop rdi */
    5962     pbCodeBuf[off++] = 0x58 + X86_GREG_xSI;     /* pop rsi */
    5963 # endif
    5964     pbCodeBuf[off++] = 0x58 + X86_GREG_xBX;     /* pop rbx */
    5965     pbCodeBuf[off++] = 0xc9;                    /* leave */
    5966     pbCodeBuf[off++] = 0xc3;                    /* ret */
    5967     pbCodeBuf[off++] = 0xcc;                    /* int3 poison */
    5968 
    5969 #elif RT_ARCH_ARM64
    5970     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    5971 
    5972     /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
    5973     AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
    5974     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    5975                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    5976                                                  IEMNATIVE_FRAME_VAR_SIZE / 8);
    5977     /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
    5978     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5979                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    5980     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5981                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    5982     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5983                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    5984     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5985                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    5986     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5987                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    5988     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    5989 
    5990     /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ;  */
    5991     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
    5992     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
    5993                                                      IEMNATIVE_FRAME_SAVE_REG_SIZE);
    5994 
    5995     /* retab / ret */
    5996 # ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
    5997     if (1)
    5998         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
    5999     else
    6000 # endif
    6001         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
    6002 
    6003 #else
    6004 # error "port me"
    6005 #endif
    6006     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6007 
    6008     return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
    6009 }
    6010 
    6011 
    6012 /**
    6013  * Emits a standard prolog.
    6014  */
    6015 static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    6016 {
    6017 #ifdef RT_ARCH_AMD64
    6018     /*
    6019      * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
    6020      * reserving 64 bytes for stack variables plus 4 non-register argument
    6021      * slots.  Fixed register assignment: xBX = pReNative;
    6022      *
    6023      * Since we always do the same register spilling, we can use the same
    6024      * unwind description for all the code.
    6025      */
    6026     uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    6027     pbCodeBuf[off++] = 0x50 + X86_GREG_xBP;     /* push rbp */
    6028     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbp, rsp */
    6029     pbCodeBuf[off++] = 0x8b;
    6030     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
    6031     pbCodeBuf[off++] = 0x50 + X86_GREG_xBX;     /* push rbx */
    6032     AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
    6033 # ifdef RT_OS_WINDOWS
    6034     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rcx ; RBX = pVCpu */
    6035     pbCodeBuf[off++] = 0x8b;
    6036     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
    6037     pbCodeBuf[off++] = 0x50 + X86_GREG_xSI;     /* push rsi */
    6038     pbCodeBuf[off++] = 0x50 + X86_GREG_xDI;     /* push rdi */
    6039 # else
    6040     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rdi ; RBX = pVCpu */
    6041     pbCodeBuf[off++] = 0x8b;
    6042     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
    6043 # endif
    6044     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r12 */
    6045     pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
    6046     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r13 */
    6047     pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
    6048     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r14 */
    6049     pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
    6050     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r15 */
    6051     pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
    6052 
    6053 # ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
    6054     /* Save the frame pointer. */
    6055     off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
    6056 # endif
    6057 
    6058     off = iemNativeEmitSubGprImm(pReNative, off,    /* sub rsp, byte 28h */
    6059                                  X86_GREG_xSP,
    6060                                    IEMNATIVE_FRAME_ALIGN_SIZE
    6061                                  + IEMNATIVE_FRAME_VAR_SIZE
    6062                                  + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
    6063                                  + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
    6064     AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
    6065     AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
    6066     AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
    6067 
    6068 #elif RT_ARCH_ARM64
    6069     /*
    6070      * We set up a stack frame exactly like on x86, only we have to push the
    6071      * return address our selves here.  We save all non-volatile registers.
    6072      */
    6073     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
    6074 
    6075 # ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
    6076                       * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind.  It's
    6077                       * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
    6078                       * in any way conditional, so just emitting this instructions now and hoping for the best... */
    6079     /* pacibsp */
    6080     pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
    6081 # endif
    6082 
    6083     /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
    6084     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
    6085     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    6086                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    6087                                                  -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
    6088     /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
    6089     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    6090                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    6091     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    6092                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    6093     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    6094                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    6095     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    6096                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    6097     /* Save the BP and LR (ret address) registers at the top of the frame. */
    6098     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    6099                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    6100     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    6101     /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
    6102     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
    6103                                                      ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
    6104 
    6105     /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ;  Allocate the variable area from SP. */
    6106     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
    6107 
    6108     /* mov r28, r0  */
    6109     off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
    6110     /* mov r27, r1  */
    6111     off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
    6112 
    6113 # ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
    6114     /* Save the frame pointer. */
    6115     off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
    6116                                            ARMV8_A64_REG_X2);
    6117 # endif
    6118 
    6119 #else
    6120 # error "port me"
    6121 #endif
    6122     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6123     return off;
    6124 }
    6125 
    6126 
    6127 
    6128 
    6129 /*********************************************************************************************************************************
    6130 *   Emitters for IEM_MC_BEGIN and IEM_MC_END.                                                                                    *
    6131 *********************************************************************************************************************************/
    6132 
    6133 #define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
    6134     { \
    6135         Assert(pReNative->Core.bmVars     == 0); \
    6136         Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
    6137         Assert(pReNative->Core.bmStack    == 0); \
    6138         pReNative->fMc    = (a_fMcFlags); \
    6139         pReNative->fCImpl = (a_fCImplFlags); \
    6140         pReNative->cArgs  = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
    6141 
    6142 /** We have to get to the end in recompilation mode, as otherwise we won't
    6143  * generate code for all the IEM_MC_IF_XXX branches. */
    6144 #define IEM_MC_END() \
    6145         iemNativeVarFreeAll(pReNative); \
    6146     } return off
    6147 
    6148 
    6149 
    6150 /*********************************************************************************************************************************
    6151 *   Native Emitter Support.                                                                                                      *
    6152 *********************************************************************************************************************************/
    6153 
    6154 
    6155 #define IEM_MC_NATIVE_IF(a_fSupportedHosts)     if (RT_ARCH_VAL & (a_fSupportedHosts)) {
    6156 
    6157 #define IEM_MC_NATIVE_ELSE()                    } else {
    6158 
    6159 #define IEM_MC_NATIVE_ENDIF()                   } ((void)0)
    6160 
    6161 
    6162 #define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
    6163     off = a_fnEmitter(pReNative, off)
    6164 
    6165 #define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
    6166     off = a_fnEmitter(pReNative, off, (a0))
    6167 
    6168 #define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
    6169     off = a_fnEmitter(pReNative, off, (a0), (a1))
    6170 
    6171 #define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
    6172     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
    6173 
    6174 #define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
    6175     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
    6176 
    6177 #define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
    6178     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
    6179 
    6180 #define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
    6181     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
    6182 
    6183 #define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
    6184     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
    6185 
    6186 #define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
    6187     off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
    618833
    618934
     
    6339184}
    6340185
    6341 
    6342 /*********************************************************************************************************************************
    6343 *   Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX)                                               *
    6344 *********************************************************************************************************************************/
    6345 
    6346 #define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
    6347     pReNative->fMc    = 0; \
    6348     pReNative->fCImpl = (a_fFlags); \
    6349     return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
    6350 
    6351 
    6352 #define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
    6353     pReNative->fMc    = 0; \
    6354     pReNative->fCImpl = (a_fFlags); \
    6355     return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
    6356 
    6357 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6358                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    6359                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
    6360 {
    6361     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
    6362 }
    6363 
    6364 
    6365 #define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
    6366     pReNative->fMc    = 0; \
    6367     pReNative->fCImpl = (a_fFlags); \
    6368     return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
    6369                                    (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
    6370 
    6371 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6372                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    6373                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
    6374 {
    6375     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
    6376 }
    6377 
    6378 
    6379 #define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
    6380     pReNative->fMc    = 0; \
    6381     pReNative->fCImpl = (a_fFlags); \
    6382     return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
    6383                                    (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
    6384 
    6385 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6386                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    6387                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
    6388                                                     uint64_t uArg2)
    6389 {
    6390     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
    6391 }
    6392 
    6393 
    6394 
    6395 /*********************************************************************************************************************************
    6396 *   Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX)                                                     *
    6397 *********************************************************************************************************************************/
    6398 
    6399 /** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
    6400  *  and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
    6401 DECL_INLINE_THROW(uint32_t)
    6402 iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    6403 {
    6404     /*
    6405      * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
    6406      * return with special status code and make the execution loop deal with
    6407      * this.  If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
    6408      * exception and won't continue execution.  While CPUMCTX_DBG_DBGF_MASK
    6409      * could continue w/o interruption, it probably will drop into the
    6410      * debugger, so not worth the effort of trying to services it here and we
    6411      * just lump it in with the handling of the others.
    6412      *
    6413      * To simplify the code and the register state management even more (wrt
    6414      * immediate in AND operation), we always update the flags and skip the
    6415      * extra check associated conditional jump.
    6416      */
    6417     AssertCompile(   (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
    6418                   <= UINT32_MAX);
    6419 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    6420     AssertMsg(   pReNative->idxCurCall == 0
    6421               || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
    6422               ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
    6423 #endif
    6424 
    6425     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6426                                                               kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
    6427                                                               true /*fSkipLivenessAssert*/);
    6428     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
    6429                                                              X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
    6430                                                              iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
    6431     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
    6432     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
    6433 
    6434     /* Free but don't flush the EFLAGS register. */
    6435     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6436 
    6437     return off;
    6438 }
    6439 
    6440 
    6441 /** The VINF_SUCCESS dummy. */
    6442 template<int const a_rcNormal>
    6443 DECL_FORCE_INLINE(uint32_t)
    6444 iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    6445 {
    6446     AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
    6447     if (a_rcNormal != VINF_SUCCESS)
    6448     {
    6449 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    6450         off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    6451 #else
    6452         RT_NOREF_PV(idxInstr);
    6453 #endif
    6454         return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
    6455     }
    6456     return off;
    6457 }
    6458 
    6459 
    6460 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
    6461     off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    6462     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6463 
    6464 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
    6465     off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    6466     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6467     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6468 
    6469 /** Same as iemRegAddToRip64AndFinishingNoFlags. */
    6470 DECL_INLINE_THROW(uint32_t)
    6471 iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    6472 {
    6473     /* Allocate a temporary PC register. */
    6474     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    6475 
    6476     /* Perform the addition and store the result. */
    6477     off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
    6478     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6479 
    6480     /* Free but don't flush the PC register. */
    6481     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6482 
    6483     return off;
    6484 }
    6485 
    6486 
    6487 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
    6488     off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    6489     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6490 
    6491 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
    6492     off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    6493     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6494     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6495 
    6496 /** Same as iemRegAddToEip32AndFinishingNoFlags. */
    6497 DECL_INLINE_THROW(uint32_t)
    6498 iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    6499 {
    6500     /* Allocate a temporary PC register. */
    6501     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    6502 
    6503     /* Perform the addition and store the result. */
    6504     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
    6505     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6506 
    6507     /* Free but don't flush the PC register. */
    6508     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6509 
    6510     return off;
    6511 }
    6512 
    6513 
    6514 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
    6515     off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    6516     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6517 
    6518 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
    6519     off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
    6520     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6521     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6522 
    6523 /** Same as iemRegAddToIp16AndFinishingNoFlags. */
    6524 DECL_INLINE_THROW(uint32_t)
    6525 iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    6526 {
    6527     /* Allocate a temporary PC register. */
    6528     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    6529 
    6530     /* Perform the addition and store the result. */
    6531     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
    6532     off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    6533     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6534 
    6535     /* Free but don't flush the PC register. */
    6536     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6537 
    6538     return off;
    6539 }
    6540 
    6541 
    6542 
    6543 /*********************************************************************************************************************************
    6544 *   Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX).                                *
    6545 *********************************************************************************************************************************/
    6546 
    6547 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
    6548     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    6549                                                             (a_enmEffOpSize), pCallEntry->idxInstr); \
    6550     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6551 
    6552 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
    6553     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    6554                                                             (a_enmEffOpSize), pCallEntry->idxInstr); \
    6555     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6556     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6557 
    6558 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
    6559     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    6560                                                             IEMMODE_16BIT, pCallEntry->idxInstr); \
    6561     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6562 
    6563 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
    6564         off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    6565                                                                 IEMMODE_16BIT, pCallEntry->idxInstr); \
    6566     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6567     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6568 
    6569 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
    6570     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    6571                                                             IEMMODE_64BIT, pCallEntry->idxInstr); \
    6572     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6573 
    6574 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
    6575     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    6576                                                             IEMMODE_64BIT, pCallEntry->idxInstr); \
    6577     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6578     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6579 
    6580 /** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
    6581  *  iemRegRip64RelativeJumpS16AndFinishNoFlags and
    6582  *  iemRegRip64RelativeJumpS32AndFinishNoFlags. */
    6583 DECL_INLINE_THROW(uint32_t)
    6584 iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
    6585                                                   int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
    6586 {
    6587     Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
    6588 
    6589     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    6590     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6591 
    6592     /* Allocate a temporary PC register. */
    6593     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    6594 
    6595     /* Perform the addition. */
    6596     off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
    6597 
    6598     if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
    6599     {
    6600         /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
    6601         off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    6602     }
    6603     else
    6604     {
    6605         /* Just truncate the result to 16-bit IP. */
    6606         Assert(enmEffOpSize == IEMMODE_16BIT);
    6607         off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    6608     }
    6609     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6610 
    6611     /* Free but don't flush the PC register. */
    6612     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6613 
    6614     return off;
    6615 }
    6616 
    6617 
    6618 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
    6619     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    6620                                                             (a_enmEffOpSize), pCallEntry->idxInstr); \
    6621     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6622 
    6623 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
    6624     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    6625                                                             (a_enmEffOpSize), pCallEntry->idxInstr); \
    6626     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6627     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6628 
    6629 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
    6630     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    6631                                                             IEMMODE_16BIT, pCallEntry->idxInstr); \
    6632     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6633 
    6634 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
    6635     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    6636                                                             IEMMODE_16BIT, pCallEntry->idxInstr); \
    6637     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6638     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6639 
    6640 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
    6641     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    6642                                                             IEMMODE_32BIT, pCallEntry->idxInstr); \
    6643     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6644 
    6645 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
    6646             off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    6647                                                                     IEMMODE_32BIT, pCallEntry->idxInstr); \
    6648     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6649     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6650 
    6651 /** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
    6652  *  iemRegEip32RelativeJumpS16AndFinishNoFlags and
    6653  *  iemRegEip32RelativeJumpS32AndFinishNoFlags. */
    6654 DECL_INLINE_THROW(uint32_t)
    6655 iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
    6656                                                   int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
    6657 {
    6658     Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
    6659 
    6660     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    6661     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6662 
    6663     /* Allocate a temporary PC register. */
    6664     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    6665 
    6666     /* Perform the addition. */
    6667     off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
    6668 
    6669     /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
    6670     if (enmEffOpSize == IEMMODE_16BIT)
    6671         off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    6672 
    6673     /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
    6674 /** @todo we can skip this in 32-bit FLAT mode. */
    6675     off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    6676 
    6677     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6678 
    6679     /* Free but don't flush the PC register. */
    6680     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6681 
    6682     return off;
    6683 }
    6684 
    6685 
    6686 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
    6687     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
    6688     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6689 
    6690 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
    6691     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
    6692     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6693     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6694 
    6695 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
    6696     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
    6697     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6698 
    6699 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
    6700     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
    6701     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6702     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6703 
    6704 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
    6705     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
    6706     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6707 
    6708 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
    6709     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
    6710     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
    6711     off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
    6712 
    6713 /** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
    6714 DECL_INLINE_THROW(uint32_t)
    6715 iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6716                                                  uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
    6717 {
    6718     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    6719     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6720 
    6721     /* Allocate a temporary PC register. */
    6722     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    6723 
    6724     /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
    6725     off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
    6726     off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    6727     off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    6728     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6729 
    6730     /* Free but don't flush the PC register. */
    6731     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6732 
    6733     return off;
    6734 }
    6735 
    6736 
    6737 
    6738 /*********************************************************************************************************************************
    6739 *   Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH).                                    *
    6740 *********************************************************************************************************************************/
    6741 
    6742 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
    6743 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
    6744     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    6745 
    6746 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
    6747 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
    6748     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    6749 
    6750 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
    6751 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
    6752     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    6753 
    6754 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
    6755  *  clears flags. */
    6756 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
    6757     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
    6758     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6759 
    6760 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
    6761  *  clears flags. */
    6762 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
    6763     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
    6764     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6765 
    6766 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
    6767  *  clears flags. */
    6768 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
    6769     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
    6770     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6771 
    6772 #undef IEM_MC_SET_RIP_U16_AND_FINISH
    6773 
    6774 
    6775 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
    6776 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
    6777     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
    6778 
    6779 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
    6780 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
    6781     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
    6782 
    6783 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
    6784  *  clears flags. */
    6785 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
    6786     IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
    6787     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6788 
    6789 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
    6790  *  and clears flags. */
    6791 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
    6792     IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
    6793     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6794 
    6795 #undef IEM_MC_SET_RIP_U32_AND_FINISH
    6796 
    6797 
    6798 /** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
    6799 #define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
    6800     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
    6801 
    6802 /** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
    6803  *  and clears flags. */
    6804 #define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
    6805     IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
    6806     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6807 
    6808 #undef IEM_MC_SET_RIP_U64_AND_FINISH
    6809 
    6810 
    6811 /** Same as iemRegRipJumpU16AndFinishNoFlags,
    6812  *  iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
    6813 DECL_INLINE_THROW(uint32_t)
    6814 iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
    6815                             uint8_t idxInstr, uint8_t cbVar)
    6816 {
    6817     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
    6818     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
    6819 
    6820     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    6821     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6822 
    6823     /* Get a register with the new PC loaded from idxVarPc.
    6824        Note! This ASSUMES that the high bits of the GPR is zeroed. */
    6825     uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
    6826 
    6827     /* Check limit (may #GP(0) + exit TB). */
    6828     if (!f64Bit)
    6829 /** @todo we can skip this test in FLAT 32-bit mode. */
    6830         off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    6831     /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
    6832     else if (cbVar > sizeof(uint32_t))
    6833         off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    6834 
    6835     /* Store the result. */
    6836     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6837 
    6838     iemNativeVarRegisterRelease(pReNative, idxVarPc);
    6839     /** @todo implictly free the variable? */
    6840 
    6841     return off;
    6842 }
    6843 
    6844 
    6845 
    6846 /*********************************************************************************************************************************
    6847 *   Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX)                                                                     *
    6848 *********************************************************************************************************************************/
    6849 
    6850 #define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
    6851     off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
    6852 
    6853 /**
    6854  * Emits code to check if a \#NM exception should be raised.
    6855  *
    6856  * @returns New code buffer offset, UINT32_MAX on failure.
    6857  * @param   pReNative       The native recompile state.
    6858  * @param   off             The code buffer offset.
    6859  * @param   idxInstr        The current instruction.
    6860  */
    6861 DECL_INLINE_THROW(uint32_t)
    6862 iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    6863 {
    6864     /*
    6865      * Make sure we don't have any outstanding guest register writes as we may
    6866      * raise an #NM and all guest register must be up to date in CPUMCTX.
    6867      *
    6868      * @todo r=aeichner Can we postpone this to the RaiseNm path?
    6869      */
    6870     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6871 
    6872 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    6873     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    6874 #else
    6875     RT_NOREF(idxInstr);
    6876 #endif
    6877 
    6878     /* Allocate a temporary CR0 register. */
    6879     uint8_t const idxCr0Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
    6880     uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
    6881 
    6882     /*
    6883      * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
    6884      *     return raisexcpt();
    6885      */
    6886     /* Test and jump. */
    6887     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
    6888 
    6889     /* Free but don't flush the CR0 register. */
    6890     iemNativeRegFreeTmp(pReNative, idxCr0Reg);
    6891 
    6892     return off;
    6893 }
    6894 
    6895 
    6896 #define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
    6897     off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
    6898 
    6899 /**
    6900  * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
    6901  *
    6902  * @returns New code buffer offset, UINT32_MAX on failure.
    6903  * @param   pReNative       The native recompile state.
    6904  * @param   off             The code buffer offset.
    6905  * @param   idxInstr        The current instruction.
    6906  */
    6907 DECL_INLINE_THROW(uint32_t)
    6908 iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    6909 {
    6910     /*
    6911      * Make sure we don't have any outstanding guest register writes as we may
    6912      * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
    6913      *
    6914      * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
    6915      */
    6916     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6917 
    6918 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    6919     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    6920 #else
    6921     RT_NOREF(idxInstr);
    6922 #endif
    6923 
    6924     /* Allocate a temporary CR0 and CR4 register. */
    6925     uint8_t const idxCr0Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
    6926     uint8_t const idxCr4Reg       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
    6927     uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
    6928     uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
    6929 
    6930     /** @todo r=aeichner Optimize this more later to have less compares and branches,
    6931      *                   (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
    6932      *                   actual performance benefit first). */
    6933     /*
    6934      * if (cr0 & X86_CR0_EM)
    6935      *     return raisexcpt();
    6936      */
    6937     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM, idxLabelRaiseUd);
    6938     /*
    6939      * if (!(cr4 & X86_CR4_OSFXSR))
    6940      *     return raisexcpt();
    6941      */
    6942     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR, idxLabelRaiseUd);
    6943     /*
    6944      * if (cr0 & X86_CR0_TS)
    6945      *     return raisexcpt();
    6946      */
    6947     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
    6948 
    6949     /* Free but don't flush the CR0 and CR4 register. */
    6950     iemNativeRegFreeTmp(pReNative, idxCr0Reg);
    6951     iemNativeRegFreeTmp(pReNative, idxCr4Reg);
    6952 
    6953     return off;
    6954 }
    6955 
    6956 
    6957 
    6958 /*********************************************************************************************************************************
    6959 *   Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF)                                                         *
    6960 *********************************************************************************************************************************/
    6961 
    6962 /**
    6963  * Pushes an IEM_MC_IF_XXX onto the condition stack.
    6964  *
    6965  * @returns Pointer to the condition stack entry on success, NULL on failure
    6966  *          (too many nestings)
    6967  */
    6968 DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
    6969 {
    6970     uint32_t const idxStack = pReNative->cCondDepth;
    6971     AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
    6972 
    6973     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
    6974     pReNative->cCondDepth = (uint8_t)(idxStack + 1);
    6975 
    6976     uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
    6977     pEntry->fInElse       = false;
    6978     pEntry->idxLabelElse  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
    6979     pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
    6980 
    6981     return pEntry;
    6982 }
    6983 
    6984 
    6985 /**
    6986  * Start of the if-block, snapshotting the register and variable state.
    6987  */
    6988 DECL_INLINE_THROW(void)
    6989 iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
    6990 {
    6991     Assert(offIfBlock != UINT32_MAX);
    6992     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    6993     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    6994     Assert(!pEntry->fInElse);
    6995 
    6996     /* Define the start of the IF block if request or for disassembly purposes. */
    6997     if (idxLabelIf != UINT32_MAX)
    6998         iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
    6999 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    7000     else
    7001         iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
    7002 #else
    7003     RT_NOREF(offIfBlock);
    7004 #endif
    7005 
    7006     /* Copy the initial state so we can restore it in the 'else' block. */
    7007     pEntry->InitialState = pReNative->Core;
    7008 }
    7009 
    7010 
    7011 #define IEM_MC_ELSE() } while (0); \
    7012         off = iemNativeEmitElse(pReNative, off); \
    7013         do {
    7014 
    7015 /** Emits code related to IEM_MC_ELSE. */
    7016 DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    7017 {
    7018     /* Check sanity and get the conditional stack entry. */
    7019     Assert(off != UINT32_MAX);
    7020     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    7021     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    7022     Assert(!pEntry->fInElse);
    7023 
    7024     /* Jump to the endif */
    7025     off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
    7026 
    7027     /* Define the else label and enter the else part of the condition. */
    7028     iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
    7029     pEntry->fInElse = true;
    7030 
    7031     /* Snapshot the core state so we can do a merge at the endif and restore
    7032        the snapshot we took at the start of the if-block. */
    7033     pEntry->IfFinalState = pReNative->Core;
    7034     pReNative->Core = pEntry->InitialState;
    7035 
    7036     return off;
    7037 }
    7038 
    7039 
    7040 #define IEM_MC_ENDIF() } while (0); \
    7041         off = iemNativeEmitEndIf(pReNative, off)
    7042 
    7043 /** Emits code related to IEM_MC_ENDIF. */
    7044 DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    7045 {
    7046     /* Check sanity and get the conditional stack entry. */
    7047     Assert(off != UINT32_MAX);
    7048     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    7049     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    7050 
    7051     /*
    7052      * Now we have find common group with the core state at the end of the
    7053      * if-final.  Use the smallest common denominator and just drop anything
    7054      * that isn't the same in both states.
    7055      */
    7056     /** @todo We could, maybe, shuffle registers around if we thought it helpful,
    7057      *        which is why we're doing this at the end of the else-block.
    7058      *        But we'd need more info about future for that to be worth the effort. */
    7059     PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
    7060     if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
    7061     {
    7062         /* shadow guest stuff first. */
    7063         uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
    7064         if (fGstRegs)
    7065         {
    7066             Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
    7067             do
    7068             {
    7069                 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    7070                 fGstRegs &= ~RT_BIT_64(idxGstReg);
    7071 
    7072                 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    7073                 if (  !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
    7074                     || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
    7075                 {
    7076                     Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
    7077                            g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
    7078                     iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
    7079                 }
    7080             } while (fGstRegs);
    7081         }
    7082         else
    7083             Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
    7084 
    7085         /* Check variables next. For now we must require them to be identical
    7086            or stuff we can recreate. */
    7087         Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
    7088         uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
    7089         if (fVars)
    7090         {
    7091             uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
    7092             do
    7093             {
    7094                 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
    7095                 fVars &= ~RT_BIT_32(idxVar);
    7096 
    7097                 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
    7098                 {
    7099                     if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
    7100                         continue;
    7101                     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    7102                     {
    7103                         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    7104                         if (idxHstReg != UINT8_MAX)
    7105                         {
    7106                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    7107                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    7108                             Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
    7109                                    g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
    7110                         }
    7111                         continue;
    7112                     }
    7113                 }
    7114                 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
    7115                     continue;
    7116 
    7117                 /* Irreconcilable, so drop it. */
    7118                 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    7119                 if (idxHstReg != UINT8_MAX)
    7120                 {
    7121                     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    7122                     pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    7123                     Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
    7124                            g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
    7125                 }
    7126                 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
    7127                 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    7128             } while (fVars);
    7129         }
    7130 
    7131         /* Finally, check that the host register allocations matches. */
    7132         AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
    7133                       ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
    7134                        pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
    7135                       IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
    7136     }
    7137 
    7138     /*
    7139      * Define the endif label and maybe the else one if we're still in the 'if' part.
    7140      */
    7141     if (!pEntry->fInElse)
    7142         iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
    7143     else
    7144         Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
    7145     iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
    7146 
    7147     /* Pop the conditional stack.*/
    7148     pReNative->cCondDepth -= 1;
    7149 
    7150     return off;
    7151 }
    7152 
    7153 
    7154 #define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
    7155         off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
    7156         do {
    7157 
    7158 /** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
    7159 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
    7160 {
    7161     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7162 
    7163     /* Get the eflags. */
    7164     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7165                                                               kIemNativeGstRegUse_ReadOnly);
    7166 
    7167     /* Test and jump. */
    7168     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
    7169 
    7170     /* Free but don't flush the EFlags register. */
    7171     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7172 
    7173     /* Make a copy of the core state now as we start the if-block. */
    7174     iemNativeCondStartIfBlock(pReNative, off);
    7175 
    7176     return off;
    7177 }
    7178 
    7179 
    7180 #define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
    7181         off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
    7182         do {
    7183 
    7184 /** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
    7185 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
    7186 {
    7187     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7188 
    7189     /* Get the eflags. */
    7190     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7191                                                               kIemNativeGstRegUse_ReadOnly);
    7192 
    7193     /* Test and jump. */
    7194     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
    7195 
    7196     /* Free but don't flush the EFlags register. */
    7197     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7198 
    7199     /* Make a copy of the core state now as we start the if-block. */
    7200     iemNativeCondStartIfBlock(pReNative, off);
    7201 
    7202     return off;
    7203 }
    7204 
    7205 
    7206 #define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
    7207         off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
    7208         do {
    7209 
    7210 /** Emits code for IEM_MC_IF_EFL_BIT_SET. */
    7211 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
    7212 {
    7213     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7214 
    7215     /* Get the eflags. */
    7216     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7217                                                               kIemNativeGstRegUse_ReadOnly);
    7218 
    7219     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    7220     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    7221 
    7222     /* Test and jump. */
    7223     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    7224 
    7225     /* Free but don't flush the EFlags register. */
    7226     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7227 
    7228     /* Make a copy of the core state now as we start the if-block. */
    7229     iemNativeCondStartIfBlock(pReNative, off);
    7230 
    7231     return off;
    7232 }
    7233 
    7234 
    7235 #define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
    7236         off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
    7237         do {
    7238 
    7239 /** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
    7240 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
    7241 {
    7242     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7243 
    7244     /* Get the eflags. */
    7245     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7246                                                               kIemNativeGstRegUse_ReadOnly);
    7247 
    7248     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    7249     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    7250 
    7251     /* Test and jump. */
    7252     off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    7253 
    7254     /* Free but don't flush the EFlags register. */
    7255     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7256 
    7257     /* Make a copy of the core state now as we start the if-block. */
    7258     iemNativeCondStartIfBlock(pReNative, off);
    7259 
    7260     return off;
    7261 }
    7262 
    7263 
    7264 #define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2)         \
    7265     off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
    7266     do {
    7267 
    7268 #define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2)         \
    7269     off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
    7270     do {
    7271 
    7272 /** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
    7273 DECL_INLINE_THROW(uint32_t)
    7274 iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    7275                                   uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
    7276 {
    7277     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7278 
    7279     /* Get the eflags. */
    7280     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7281                                                               kIemNativeGstRegUse_ReadOnly);
    7282 
    7283     unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
    7284     Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
    7285 
    7286     unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
    7287     Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
    7288     Assert(iBitNo1 != iBitNo2);
    7289 
    7290 #ifdef RT_ARCH_AMD64
    7291     uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
    7292 
    7293     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    7294     if (iBitNo1 > iBitNo2)
    7295         off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
    7296     else
    7297         off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
    7298     off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    7299 
    7300 #elif defined(RT_ARCH_ARM64)
    7301     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    7302     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    7303 
    7304     /* and tmpreg, eflreg, #1<<iBitNo1 */
    7305     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
    7306 
    7307     /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
    7308     if (iBitNo1 > iBitNo2)
    7309         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    7310                                                 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
    7311     else
    7312         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    7313                                                 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
    7314 
    7315     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7316 
    7317 #else
    7318 # error "Port me"
    7319 #endif
    7320 
    7321     /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
    7322     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
    7323                                                      pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
    7324 
    7325     /* Free but don't flush the EFlags and tmp registers. */
    7326     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    7327     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7328 
    7329     /* Make a copy of the core state now as we start the if-block. */
    7330     iemNativeCondStartIfBlock(pReNative, off);
    7331 
    7332     return off;
    7333 }
    7334 
    7335 
    7336 #define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
    7337     off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
    7338     do {
    7339 
    7340 #define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
    7341     off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
    7342     do {
    7343 
    7344 /** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
    7345  *  IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
    7346 DECL_INLINE_THROW(uint32_t)
    7347 iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
    7348                                               uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
    7349 {
    7350     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7351 
    7352     /* We need an if-block label for the non-inverted variant. */
    7353     uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
    7354                                                                  pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
    7355 
    7356     /* Get the eflags. */
    7357     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7358                                                               kIemNativeGstRegUse_ReadOnly);
    7359 
    7360     /* Translate the flag masks to bit numbers. */
    7361     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    7362     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    7363 
    7364     unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
    7365     Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
    7366     Assert(iBitNo1 != iBitNo);
    7367 
    7368     unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
    7369     Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
    7370     Assert(iBitNo2 != iBitNo);
    7371     Assert(iBitNo2 != iBitNo1);
    7372 
    7373 #ifdef RT_ARCH_AMD64
    7374     uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
    7375 #elif defined(RT_ARCH_ARM64)
    7376     uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    7377 #endif
    7378 
    7379     /* Check for the lone bit first. */
    7380     if (!fInverted)
    7381         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    7382     else
    7383         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
    7384 
    7385     /* Then extract and compare the other two bits. */
    7386 #ifdef RT_ARCH_AMD64
    7387     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    7388     if (iBitNo1 > iBitNo2)
    7389         off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
    7390     else
    7391         off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
    7392     off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    7393 
    7394 #elif defined(RT_ARCH_ARM64)
    7395     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    7396 
    7397     /* and tmpreg, eflreg, #1<<iBitNo1 */
    7398     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
    7399 
    7400     /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
    7401     if (iBitNo1 > iBitNo2)
    7402         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    7403                                                 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
    7404     else
    7405         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    7406                                                 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
    7407 
    7408     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7409 
    7410 #else
    7411 # error "Port me"
    7412 #endif
    7413 
    7414     /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
    7415     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
    7416                                                      pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
    7417 
    7418     /* Free but don't flush the EFlags and tmp registers. */
    7419     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    7420     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7421 
    7422     /* Make a copy of the core state now as we start the if-block. */
    7423     iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
    7424 
    7425     return off;
    7426 }
    7427 
    7428 
    7429 #define IEM_MC_IF_CX_IS_NZ() \
    7430     off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
    7431     do {
    7432 
    7433 /** Emits code for IEM_MC_IF_CX_IS_NZ. */
    7434 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    7435 {
    7436     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7437 
    7438     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    7439                                                                  kIemNativeGstRegUse_ReadOnly);
    7440     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
    7441     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    7442 
    7443     iemNativeCondStartIfBlock(pReNative, off);
    7444     return off;
    7445 }
    7446 
    7447 
    7448 #define IEM_MC_IF_ECX_IS_NZ() \
    7449     off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
    7450     do {
    7451 
    7452 #define IEM_MC_IF_RCX_IS_NZ() \
    7453     off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
    7454     do {
    7455 
    7456 /** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
    7457 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
    7458 {
    7459     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7460 
    7461     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    7462                                                                  kIemNativeGstRegUse_ReadOnly);
    7463     off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
    7464     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    7465 
    7466     iemNativeCondStartIfBlock(pReNative, off);
    7467     return off;
    7468 }
    7469 
    7470 
    7471 #define IEM_MC_IF_CX_IS_NOT_ONE() \
    7472     off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
    7473     do {
    7474 
    7475 /** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
    7476 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    7477 {
    7478     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7479 
    7480     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    7481                                                                  kIemNativeGstRegUse_ReadOnly);
    7482 #ifdef RT_ARCH_AMD64
    7483     off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    7484 #else
    7485     uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    7486     off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
    7487     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    7488 #endif
    7489     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    7490 
    7491     iemNativeCondStartIfBlock(pReNative, off);
    7492     return off;
    7493 }
    7494 
    7495 
    7496 #define IEM_MC_IF_ECX_IS_NOT_ONE() \
    7497     off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
    7498     do {
    7499 
    7500 #define IEM_MC_IF_RCX_IS_NOT_ONE() \
    7501     off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
    7502     do {
    7503 
    7504 /** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
    7505 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
    7506 {
    7507     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7508 
    7509     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    7510                                                                  kIemNativeGstRegUse_ReadOnly);
    7511     if (f64Bit)
    7512         off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    7513     else
    7514         off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    7515     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    7516 
    7517     iemNativeCondStartIfBlock(pReNative, off);
    7518     return off;
    7519 }
    7520 
    7521 
    7522 #define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
    7523     off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
    7524     do {
    7525 
    7526 #define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
    7527     off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
    7528     do {
    7529 
    7530 /** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
    7531  *  IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
    7532 DECL_INLINE_THROW(uint32_t)
    7533 iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
    7534 {
    7535     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7536 
    7537     /* We have to load both RCX and EFLAGS before we can start branching,
    7538        otherwise we'll end up in the else-block with an inconsistent
    7539        register allocator state.
    7540        Doing EFLAGS first as it's more likely to be loaded, right? */
    7541     uint8_t const idxEflReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7542                                                                  kIemNativeGstRegUse_ReadOnly);
    7543     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    7544                                                                  kIemNativeGstRegUse_ReadOnly);
    7545 
    7546     /** @todo we could reduce this to a single branch instruction by spending a
    7547      *        temporary register and some setnz stuff.  Not sure if loops are
    7548      *        worth it. */
    7549     /* Check CX. */
    7550 #ifdef RT_ARCH_AMD64
    7551     off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    7552 #else
    7553     uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    7554     off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
    7555     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    7556 #endif
    7557 
    7558     /* Check the EFlags bit. */
    7559     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    7560     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    7561     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
    7562                                                      !fCheckIfSet /*fJmpIfSet*/);
    7563 
    7564     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    7565     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7566 
    7567     iemNativeCondStartIfBlock(pReNative, off);
    7568     return off;
    7569 }
    7570 
    7571 
    7572 #define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
    7573     off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
    7574     do {
    7575 
    7576 #define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
    7577     off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
    7578     do {
    7579 
    7580 #define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
    7581     off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
    7582     do {
    7583 
    7584 #define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
    7585     off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
    7586     do {
    7587 
    7588 /** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
    7589  *  IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
    7590  *  IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
    7591  *  IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
    7592 DECL_INLINE_THROW(uint32_t)
    7593 iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    7594                                                uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
    7595 {
    7596     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    7597 
    7598     /* We have to load both RCX and EFLAGS before we can start branching,
    7599        otherwise we'll end up in the else-block with an inconsistent
    7600        register allocator state.
    7601        Doing EFLAGS first as it's more likely to be loaded, right? */
    7602     uint8_t const idxEflReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    7603                                                                  kIemNativeGstRegUse_ReadOnly);
    7604     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    7605                                                                  kIemNativeGstRegUse_ReadOnly);
    7606 
    7607     /** @todo we could reduce this to a single branch instruction by spending a
    7608      *        temporary register and some setnz stuff.  Not sure if loops are
    7609      *        worth it. */
    7610     /* Check RCX/ECX. */
    7611     if (f64Bit)
    7612         off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    7613     else
    7614         off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
    7615 
    7616     /* Check the EFlags bit. */
    7617     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    7618     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    7619     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
    7620                                                      !fCheckIfSet /*fJmpIfSet*/);
    7621 
    7622     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    7623     iemNativeRegFreeTmp(pReNative, idxEflReg);
    7624 
    7625     iemNativeCondStartIfBlock(pReNative, off);
    7626     return off;
    7627 }
    7628 
    7629 
    7630 
    7631 /*********************************************************************************************************************************
    7632 *   Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++                                                            *
    7633 *********************************************************************************************************************************/
    7634 /** Number of hidden arguments for CIMPL calls.
    7635  * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
    7636 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    7637 # define IEM_CIMPL_HIDDEN_ARGS 3
    7638 #else
    7639 # define IEM_CIMPL_HIDDEN_ARGS 2
    7640 #endif
    7641 
    7642 #define IEM_MC_NOREF(a_Name) \
    7643     RT_NOREF_PV(a_Name)
    7644 
    7645 #define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
    7646     uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
    7647 
    7648 #define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
    7649     uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
    7650 
    7651 #define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
    7652     uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
    7653 
    7654 #define IEM_MC_LOCAL(a_Type, a_Name) \
    7655     uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
    7656 
    7657 #define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
    7658     uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
    7659 
    7660 
    7661 /**
    7662  * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
    7663  */
    7664 DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
    7665 {
    7666     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
    7667         return IEM_CIMPL_HIDDEN_ARGS;
    7668     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
    7669         return 1;
    7670     return 0;
    7671 }
    7672 
    7673 
    7674 /**
    7675  * Internal work that allocates a variable with kind set to
    7676  * kIemNativeVarKind_Invalid and no current stack allocation.
    7677  *
    7678  * The kind will either be set by the caller or later when the variable is first
    7679  * assigned a value.
    7680  *
    7681  * @returns Unpacked index.
    7682  * @internal
    7683  */
    7684 static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    7685 {
    7686     Assert(cbType > 0 && cbType <= 64);
    7687     unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
    7688     AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
    7689     pReNative->Core.bmVars |= RT_BIT_32(idxVar);
    7690     pReNative->Core.aVars[idxVar].enmKind        = kIemNativeVarKind_Invalid;
    7691     pReNative->Core.aVars[idxVar].cbVar          = cbType;
    7692     pReNative->Core.aVars[idxVar].idxStackSlot   = UINT8_MAX;
    7693     pReNative->Core.aVars[idxVar].idxReg         = UINT8_MAX;
    7694     pReNative->Core.aVars[idxVar].uArgNo         = UINT8_MAX;
    7695     pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
    7696     pReNative->Core.aVars[idxVar].enmGstReg      = kIemNativeGstReg_End;
    7697     pReNative->Core.aVars[idxVar].fRegAcquired   = false;
    7698     pReNative->Core.aVars[idxVar].u.uValue       = 0;
    7699     return idxVar;
    7700 }
    7701 
    7702 
    7703 /**
    7704  * Internal work that allocates an argument variable w/o setting enmKind.
    7705  *
    7706  * @returns Unpacked index.
    7707  * @internal
    7708  */
    7709 static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    7710 {
    7711     iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
    7712     AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    7713     AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
    7714 
    7715     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    7716     pReNative->Core.aidxArgVars[iArgNo]  = idxVar; /* (unpacked) */
    7717     pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
    7718     return idxVar;
    7719 }
    7720 
    7721 
    7722 /**
    7723  * Gets the stack slot for a stack variable, allocating one if necessary.
    7724  *
    7725  * Calling this function implies that the stack slot will contain a valid
    7726  * variable value.  The caller deals with any register currently assigned to the
    7727  * variable, typically by spilling it into the stack slot.
    7728  *
    7729  * @returns The stack slot number.
    7730  * @param   pReNative   The recompiler state.
    7731  * @param   idxVar      The variable.
    7732  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS
    7733  */
    7734 DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7735 {
    7736     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7737     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    7738     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    7739 
    7740     /* Already got a slot? */
    7741     uint8_t const idxStackSlot = pVar->idxStackSlot;
    7742     if (idxStackSlot != UINT8_MAX)
    7743     {
    7744         Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
    7745         return idxStackSlot;
    7746     }
    7747 
    7748     /*
    7749      * A single slot is easy to allocate.
    7750      * Allocate them from the top end, closest to BP, to reduce the displacement.
    7751      */
    7752     if (pVar->cbVar <= sizeof(uint64_t))
    7753     {
    7754         unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    7755         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    7756         pReNative->Core.bmStack |= RT_BIT_32(iSlot);
    7757         pVar->idxStackSlot       = (uint8_t)iSlot;
    7758         Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
    7759         return (uint8_t)iSlot;
    7760     }
    7761 
    7762     /*
    7763      * We need more than one stack slot.
    7764      *
    7765      * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
    7766      */
    7767     AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
    7768     Assert(pVar->cbVar <= 64);
    7769     uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
    7770     uint32_t       fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
    7771     uint32_t       bmStack       = ~pReNative->Core.bmStack;
    7772     while (bmStack != UINT32_MAX)
    7773     {
    7774 /** @todo allocate from the top to reduce BP displacement. */
    7775         unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
    7776         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    7777         if (!(iSlot & fBitAlignMask))
    7778         {
    7779             if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
    7780             {
    7781                 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
    7782                 pVar->idxStackSlot       = (uint8_t)iSlot;
    7783                 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
    7784                        idxVar, iSlot, fBitAllocMask, pVar->cbVar));
    7785                 return (uint8_t)iSlot;
    7786             }
    7787         }
    7788         bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
    7789     }
    7790     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    7791 }
    7792 
    7793 
    7794 /**
    7795  * Changes the variable to a stack variable.
    7796  *
    7797  * Currently this is s only possible to do the first time the variable is used,
    7798  * switching later is can be implemented but not done.
    7799  *
    7800  * @param   pReNative   The recompiler state.
    7801  * @param   idxVar      The variable.
    7802  * @throws  VERR_IEM_VAR_IPE_2
    7803  */
    7804 static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7805 {
    7806     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7807     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    7808     if (pVar->enmKind != kIemNativeVarKind_Stack)
    7809     {
    7810         /* We could in theory transition from immediate to stack as well, but it
    7811            would involve the caller doing work storing the value on the stack. So,
    7812            till that's required we only allow transition from invalid. */
    7813         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7814         AssertStmt(pVar->idxReg  == UINT8_MAX,                 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7815         pVar->enmKind = kIemNativeVarKind_Stack;
    7816 
    7817         /* Note! We don't allocate a stack slot here, that's only done when a
    7818                  slot is actually needed to hold a variable value. */
    7819     }
    7820 }
    7821 
    7822 
    7823 /**
    7824  * Sets it to a variable with a constant value.
    7825  *
    7826  * This does not require stack storage as we know the value and can always
    7827  * reload it, unless of course it's referenced.
    7828  *
    7829  * @param   pReNative   The recompiler state.
    7830  * @param   idxVar      The variable.
    7831  * @param   uValue      The immediate value.
    7832  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    7833  */
    7834 static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
    7835 {
    7836     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7837     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    7838     if (pVar->enmKind != kIemNativeVarKind_Immediate)
    7839     {
    7840         /* Only simple transitions for now. */
    7841         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7842         pVar->enmKind = kIemNativeVarKind_Immediate;
    7843     }
    7844     AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7845 
    7846     pVar->u.uValue = uValue;
    7847     AssertMsg(   pVar->cbVar >= sizeof(uint64_t)
    7848               || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
    7849               ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
    7850 }
    7851 
    7852 
    7853 /**
    7854  * Sets the variable to a reference (pointer) to @a idxOtherVar.
    7855  *
    7856  * This does not require stack storage as we know the value and can always
    7857  * reload it.  Loading is postponed till needed.
    7858  *
    7859  * @param   pReNative   The recompiler state.
    7860  * @param   idxVar      The variable. Unpacked.
    7861  * @param   idxOtherVar The variable to take the (stack) address of. Unpacked.
    7862  *
    7863  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    7864  * @internal
    7865  */
    7866 static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
    7867 {
    7868     Assert(idxVar      < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
    7869     Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
    7870 
    7871     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
    7872     {
    7873         /* Only simple transitions for now. */
    7874         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    7875                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7876         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
    7877     }
    7878     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7879 
    7880     pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
    7881 
    7882     /* Update the other variable, ensure it's a stack variable. */
    7883     /** @todo handle variables with const values... that'll go boom now. */
    7884     pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
    7885     iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    7886 }
    7887 
    7888 
    7889 /**
    7890  * Sets the variable to a reference (pointer) to a guest register reference.
    7891  *
    7892  * This does not require stack storage as we know the value and can always
    7893  * reload it.  Loading is postponed till needed.
    7894  *
    7895  * @param   pReNative       The recompiler state.
    7896  * @param   idxVar          The variable.
    7897  * @param   enmRegClass     The class guest registers to reference.
    7898  * @param   idxReg          The register within @a enmRegClass to reference.
    7899  *
    7900  * @throws  VERR_IEM_VAR_IPE_2
    7901  */
    7902 static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    7903                                            IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
    7904 {
    7905     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7906     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    7907 
    7908     if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
    7909     {
    7910         /* Only simple transitions for now. */
    7911         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7912         pVar->enmKind = kIemNativeVarKind_GstRegRef;
    7913     }
    7914     AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7915 
    7916     pVar->u.GstRegRef.enmClass = enmRegClass;
    7917     pVar->u.GstRegRef.idx      = idxReg;
    7918 }
    7919 
    7920 
    7921 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    7922 {
    7923     return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
    7924 }
    7925 
    7926 
    7927 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
    7928 {
    7929     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
    7930 
    7931     /* Since we're using a generic uint64_t value type, we must truncate it if
    7932        the variable is smaller otherwise we may end up with too large value when
    7933        scaling up a imm8 w/ sign-extension.
    7934 
    7935        This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
    7936        in the bios, bx=1) when running on arm, because clang expect 16-bit
    7937        register parameters to have bits 16 and up set to zero.  Instead of
    7938        setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
    7939        CF value in the result.  */
    7940     switch (cbType)
    7941     {
    7942         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    7943         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    7944         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    7945     }
    7946     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    7947     return idxVar;
    7948 }
    7949 
    7950 
    7951 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
    7952 {
    7953     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
    7954     idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
    7955     AssertStmt(   idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
    7956                && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
    7957                && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
    7958                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    7959 
    7960     uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
    7961     iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
    7962     return idxArgVar;
    7963 }
    7964 
    7965 
    7966 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    7967 {
    7968     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
    7969     /* Don't set to stack now, leave that to the first use as for instance
    7970        IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
    7971     return idxVar;
    7972 }
    7973 
    7974 
    7975 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
    7976 {
    7977     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
    7978 
    7979     /* Since we're using a generic uint64_t value type, we must truncate it if
    7980        the variable is smaller otherwise we may end up with too large value when
    7981        scaling up a imm8 w/ sign-extension. */
    7982     switch (cbType)
    7983     {
    7984         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    7985         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    7986         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    7987     }
    7988     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    7989     return idxVar;
    7990 }
    7991 
    7992 
    7993 /**
    7994  * Makes sure variable @a idxVar has a register assigned to it and that it stays
    7995  * fixed till we call iemNativeVarRegisterRelease.
    7996  *
    7997  * @returns The host register number.
    7998  * @param   pReNative   The recompiler state.
    7999  * @param   idxVar      The variable.
    8000  * @param   poff        Pointer to the instruction buffer offset.
    8001  *                      In case a register needs to be freed up or the value
    8002  *                      loaded off the stack.
    8003  * @param  fInitialized Set if the variable must already have been initialized.
    8004  *                      Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
    8005  *                      the case.
    8006  * @param  idxRegPref   Preferred register number or UINT8_MAX.
    8007  */
    8008 DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
    8009                                                        bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
    8010 {
    8011     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8012     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8013     Assert(pVar->cbVar <= 8);
    8014     Assert(!pVar->fRegAcquired);
    8015 
    8016     uint8_t idxReg = pVar->idxReg;
    8017     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8018     {
    8019         Assert(   pVar->enmKind > kIemNativeVarKind_Invalid
    8020                && pVar->enmKind < kIemNativeVarKind_End);
    8021         pVar->fRegAcquired = true;
    8022         return idxReg;
    8023     }
    8024 
    8025     /*
    8026      * If the kind of variable has not yet been set, default to 'stack'.
    8027      */
    8028     Assert(   pVar->enmKind >= kIemNativeVarKind_Invalid
    8029            && pVar->enmKind < kIemNativeVarKind_End);
    8030     if (pVar->enmKind == kIemNativeVarKind_Invalid)
    8031         iemNativeVarSetKindToStack(pReNative, idxVar);
    8032 
    8033     /*
    8034      * We have to allocate a register for the variable, even if its a stack one
    8035      * as we don't know if there are modification being made to it before its
    8036      * finalized (todo: analyze and insert hints about that?).
    8037      *
    8038      * If we can, we try get the correct register for argument variables. This
    8039      * is assuming that most argument variables are fetched as close as possible
    8040      * to the actual call, so that there aren't any interfering hidden calls
    8041      * (memory accesses, etc) inbetween.
    8042      *
    8043      * If we cannot or it's a variable, we make sure no argument registers
    8044      * that will be used by this MC block will be allocated here, and we always
    8045      * prefer non-volatile registers to avoid needing to spill stuff for internal
    8046      * call.
    8047      */
    8048     /** @todo Detect too early argument value fetches and warn about hidden
    8049      * calls causing less optimal code to be generated in the python script. */
    8050 
    8051     uint8_t const uArgNo = pVar->uArgNo;
    8052     if (   uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
    8053         && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
    8054     {
    8055         idxReg = g_aidxIemNativeCallRegs[uArgNo];
    8056         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    8057         Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
    8058     }
    8059     else if (   idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
    8060              || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
    8061     {
    8062         uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
    8063         uint32_t const fRegs        = ~pReNative->Core.bmHstRegs
    8064                                     & ~pReNative->Core.bmHstRegsWithGstShadow
    8065                                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    8066                                     & fNotArgsMask;
    8067         if (fRegs)
    8068         {
    8069             /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
    8070             idxReg = (uint8_t)ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    8071                                                ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    8072             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    8073             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    8074             Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    8075         }
    8076         else
    8077         {
    8078             idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
    8079                                                IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
    8080             AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
    8081             Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    8082         }
    8083     }
    8084     else
    8085     {
    8086         idxReg = idxRegPref;
    8087         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    8088         Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
    8089     }
    8090     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    8091     pVar->idxReg = idxReg;
    8092 
    8093     /*
    8094      * Load it off the stack if we've got a stack slot.
    8095      */
    8096     uint8_t const idxStackSlot = pVar->idxStackSlot;
    8097     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    8098     {
    8099         Assert(fInitialized);
    8100         int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
    8101         switch (pVar->cbVar)
    8102         {
    8103             case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
    8104             case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
    8105             case 3: AssertFailed(); RT_FALL_THRU();
    8106             case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
    8107             default: AssertFailed(); RT_FALL_THRU();
    8108             case 8: *poff = iemNativeEmitLoadGprByBp(   pReNative, *poff, idxReg, offDispBp); break;
    8109         }
    8110     }
    8111     else
    8112     {
    8113         Assert(idxStackSlot == UINT8_MAX);
    8114         AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8115     }
    8116     pVar->fRegAcquired = true;
    8117     return idxReg;
    8118 }
    8119 
    8120 
    8121 /**
    8122  * The value of variable @a idxVar will be written in full to the @a enmGstReg
    8123  * guest register.
    8124  *
    8125  * This function makes sure there is a register for it and sets it to be the
    8126  * current shadow copy of @a enmGstReg.
    8127  *
    8128  * @returns The host register number.
    8129  * @param   pReNative   The recompiler state.
    8130  * @param   idxVar      The variable.
    8131  * @param   enmGstReg   The guest register this variable will be written to
    8132  *                      after this call.
    8133  * @param   poff        Pointer to the instruction buffer offset.
    8134  *                      In case a register needs to be freed up or if the
    8135  *                      variable content needs to be loaded off the stack.
    8136  *
    8137  * @note    We DO NOT expect @a idxVar to be an argument variable,
    8138  *          because we can only in the commit stage of an instruction when this
    8139  *          function is used.
    8140  */
    8141 DECL_HIDDEN_THROW(uint8_t)
    8142 iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
    8143 {
    8144     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8145     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8146     Assert(!pVar->fRegAcquired);
    8147     AssertMsgStmt(   pVar->cbVar <= 8
    8148                   && (   pVar->enmKind == kIemNativeVarKind_Immediate
    8149                       || pVar->enmKind == kIemNativeVarKind_Stack),
    8150                   ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
    8151                    pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
    8152                   IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    8153 
    8154     /*
    8155      * This shouldn't ever be used for arguments, unless it's in a weird else
    8156      * branch that doesn't do any calling and even then it's questionable.
    8157      *
    8158      * However, in case someone writes crazy wrong MC code and does register
    8159      * updates before making calls, just use the regular register allocator to
    8160      * ensure we get a register suitable for the intended argument number.
    8161      */
    8162     AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
    8163 
    8164     /*
    8165      * If there is already a register for the variable, we transfer/set the
    8166      * guest shadow copy assignment to it.
    8167      */
    8168     uint8_t idxReg = pVar->idxReg;
    8169     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8170     {
    8171         if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    8172         {
    8173             uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
    8174             iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
    8175             Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
    8176                    g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
    8177         }
    8178         else
    8179         {
    8180             iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
    8181             Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
    8182                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    8183         }
    8184         /** @todo figure this one out. We need some way of making sure the register isn't
    8185          * modified after this point, just in case we start writing crappy MC code. */
    8186         pVar->enmGstReg    = enmGstReg;
    8187         pVar->fRegAcquired = true;
    8188         return idxReg;
    8189     }
    8190     Assert(pVar->uArgNo == UINT8_MAX);
    8191 
    8192     /*
    8193      * Because this is supposed to be the commit stage, we're just tag along with the
    8194      * temporary register allocator and upgrade it to a variable register.
    8195      */
    8196     idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
    8197     Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
    8198     Assert(pReNative->Core.aHstRegs[idxReg].idxVar  == UINT8_MAX);
    8199     pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
    8200     pReNative->Core.aHstRegs[idxReg].idxVar  = idxVar;
    8201     pVar->idxReg                             = idxReg;
    8202 
    8203     /*
    8204      * Now we need to load the register value.
    8205      */
    8206     if (pVar->enmKind == kIemNativeVarKind_Immediate)
    8207         *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
    8208     else
    8209     {
    8210         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    8211         int32_t const offDispBp    = iemNativeStackCalcBpDisp(idxStackSlot);
    8212         switch (pVar->cbVar)
    8213         {
    8214             case sizeof(uint64_t):
    8215                 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
    8216                 break;
    8217             case sizeof(uint32_t):
    8218                 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
    8219                 break;
    8220             case sizeof(uint16_t):
    8221                 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
    8222                 break;
    8223             case sizeof(uint8_t):
    8224                 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
    8225                 break;
    8226             default:
    8227                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    8228         }
    8229     }
    8230 
    8231     pVar->fRegAcquired = true;
    8232     return idxReg;
    8233 }
    8234 
    8235 
    8236 /**
    8237  * Sets the host register for @a idxVarRc to @a idxReg.
    8238  *
    8239  * The register must not be allocated. Any guest register shadowing will be
    8240  * implictly dropped by this call.
    8241  *
    8242  * The variable must not have any register associated with it (causes
    8243  * VERR_IEM_VAR_IPE_10 to be raised).  Conversion to a stack variable is
    8244  * implied.
    8245  *
    8246  * @returns idxReg
    8247  * @param   pReNative   The recompiler state.
    8248  * @param   idxVar      The variable.
    8249  * @param   idxReg      The host register (typically IEMNATIVE_CALL_RET_GREG).
    8250  * @param   off         For recording in debug info.
    8251  *
    8252  * @throws  VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
    8253  */
    8254 DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
    8255 {
    8256     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8257     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8258     Assert(!pVar->fRegAcquired);
    8259     Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    8260     AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
    8261     AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
    8262 
    8263     iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
    8264     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    8265 
    8266     iemNativeVarSetKindToStack(pReNative, idxVar);
    8267     pVar->idxReg = idxReg;
    8268 
    8269     return idxReg;
    8270 }
    8271 
    8272 
    8273 /**
    8274  * A convenient helper function.
    8275  */
    8276 DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    8277                                                              uint8_t idxReg, uint32_t *poff)
    8278 {
    8279     idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
    8280     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
    8281     return idxReg;
    8282 }
    8283 
    8284 
    8285 /**
    8286  * Emit code to save volatile registers prior to a call to a helper (TLB miss).
    8287  *
    8288  * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
    8289  * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
    8290  * requirement of flushing anything in volatile host registers when making a
    8291  * call.
    8292  *
    8293  * @returns New @a off value.
    8294  * @param   pReNative           The recompiler state.
    8295  * @param   off                 The code buffer position.
    8296  * @param   fHstRegsNotToSave   Set of registers not to save & restore.
    8297  */
    8298 DECL_HIDDEN_THROW(uint32_t)
    8299 iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
    8300 {
    8301     uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
    8302     if (fHstRegs)
    8303     {
    8304         do
    8305         {
    8306             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    8307             fHstRegs &= ~RT_BIT_32(idxHstReg);
    8308 
    8309             if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
    8310             {
    8311                 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    8312                 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8313                 AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
    8314                            && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
    8315                            && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
    8316                            IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    8317                 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
    8318                 {
    8319                     case kIemNativeVarKind_Stack:
    8320                     {
    8321                         /* Temporarily spill the variable register. */
    8322                         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    8323                         Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    8324                                idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8325                         off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    8326                         continue;
    8327                     }
    8328 
    8329                     case kIemNativeVarKind_Immediate:
    8330                     case kIemNativeVarKind_VarRef:
    8331                     case kIemNativeVarKind_GstRegRef:
    8332                         /* It is weird to have any of these loaded at this point. */
    8333                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    8334                         continue;
    8335 
    8336                     case kIemNativeVarKind_End:
    8337                     case kIemNativeVarKind_Invalid:
    8338                         break;
    8339                 }
    8340                 AssertFailed();
    8341             }
    8342             else
    8343             {
    8344                 /*
    8345                  * Allocate a temporary stack slot and spill the register to it.
    8346                  */
    8347                 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    8348                 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
    8349                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    8350                 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
    8351                 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
    8352                 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    8353                        idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8354                 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    8355             }
    8356         } while (fHstRegs);
    8357     }
    8358     return off;
    8359 }
    8360 
    8361 
    8362 /**
    8363  * Emit code to restore volatile registers after to a call to a helper.
    8364  *
    8365  * @returns New @a off value.
    8366  * @param   pReNative           The recompiler state.
    8367  * @param   off                 The code buffer position.
    8368  * @param   fHstRegsNotToSave   Set of registers not to save & restore.
    8369  * @see     iemNativeVarSaveVolatileRegsPreHlpCall(),
    8370  *          iemNativeRegRestoreGuestShadowsInVolatileRegs()
    8371  */
    8372 DECL_HIDDEN_THROW(uint32_t)
    8373 iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
    8374 {
    8375     uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
    8376     if (fHstRegs)
    8377     {
    8378         do
    8379         {
    8380             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    8381             fHstRegs &= ~RT_BIT_32(idxHstReg);
    8382 
    8383             if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
    8384             {
    8385                 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    8386                 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8387                 AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
    8388                            && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
    8389                            && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
    8390                            IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    8391                 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
    8392                 {
    8393                     case kIemNativeVarKind_Stack:
    8394                     {
    8395                         /* Unspill the variable register. */
    8396                         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    8397                         Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
    8398                                idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8399                         off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    8400                         continue;
    8401                     }
    8402 
    8403                     case kIemNativeVarKind_Immediate:
    8404                     case kIemNativeVarKind_VarRef:
    8405                     case kIemNativeVarKind_GstRegRef:
    8406                         /* It is weird to have any of these loaded at this point. */
    8407                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    8408                         continue;
    8409 
    8410                     case kIemNativeVarKind_End:
    8411                     case kIemNativeVarKind_Invalid:
    8412                         break;
    8413                 }
    8414                 AssertFailed();
    8415             }
    8416             else
    8417             {
    8418                 /*
    8419                  * Restore from temporary stack slot.
    8420                  */
    8421                 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
    8422                 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
    8423                 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
    8424                 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
    8425 
    8426                 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    8427             }
    8428         } while (fHstRegs);
    8429     }
    8430     return off;
    8431 }
    8432 
    8433 
    8434 /**
    8435  * Worker that frees the stack slots for variable @a idxVar if any allocated.
    8436  *
    8437  * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
    8438  *
    8439  * ASSUMES that @a idxVar is valid and unpacked.
    8440  */
    8441 DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    8442 {
    8443     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
    8444     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    8445     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    8446     {
    8447         uint8_t const  cbVar      = pReNative->Core.aVars[idxVar].cbVar;
    8448         uint8_t const  cSlots     = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
    8449         uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
    8450         Assert(cSlots > 0);
    8451         Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
    8452         Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
    8453                idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
    8454         pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
    8455         pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
    8456     }
    8457     else
    8458         Assert(idxStackSlot == UINT8_MAX);
    8459 }
    8460 
    8461 
    8462 /**
    8463  * Worker that frees a single variable.
    8464  *
    8465  * ASSUMES that @a idxVar is valid and unpacked.
    8466  */
    8467 DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    8468 {
    8469     Assert(   pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid  /* Including invalid as we may have unused */
    8470            && pReNative->Core.aVars[idxVar].enmKind <  kIemNativeVarKind_End);    /* variables in conditional branches. */
    8471     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    8472 
    8473     /* Free the host register first if any assigned. */
    8474     uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    8475     if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8476     {
    8477         Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
    8478         pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    8479         pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    8480     }
    8481 
    8482     /* Free argument mapping. */
    8483     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    8484     if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
    8485         pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
    8486 
    8487     /* Free the stack slots. */
    8488     iemNativeVarFreeStackSlots(pReNative, idxVar);
    8489 
    8490     /* Free the actual variable. */
    8491     pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
    8492     pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    8493 }
    8494 
    8495 
    8496 /**
    8497  * Worker for iemNativeVarFreeAll that's called when there is anything to do.
    8498  */
    8499 DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
    8500 {
    8501     while (bmVars != 0)
    8502     {
    8503         uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    8504         bmVars &= ~RT_BIT_32(idxVar);
    8505 
    8506 #if 1 /** @todo optimize by simplifying this later... */
    8507         iemNativeVarFreeOneWorker(pReNative, idxVar);
    8508 #else
    8509         /* Only need to free the host register, the rest is done as bulk updates below. */
    8510         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    8511         if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8512         {
    8513             Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
    8514             pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    8515             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    8516         }
    8517 #endif
    8518     }
    8519 #if 0 /** @todo optimize by simplifying this later... */
    8520     pReNative->Core.bmVars     = 0;
    8521     pReNative->Core.bmStack    = 0;
    8522     pReNative->Core.u64ArgVars = UINT64_MAX;
    8523 #endif
    8524 }
    8525 
    8526 
    8527 /**
    8528  * This is called by IEM_MC_END() to clean up all variables.
    8529  */
    8530 DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
    8531 {
    8532     uint32_t const bmVars = pReNative->Core.bmVars;
    8533     if (bmVars != 0)
    8534         iemNativeVarFreeAllSlow(pReNative, bmVars);
    8535     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    8536     Assert(pReNative->Core.bmStack    == 0);
    8537 }
    8538 
    8539 
    8540 #define IEM_MC_FREE_LOCAL(a_Name)   iemNativeVarFreeLocal(pReNative, a_Name)
    8541 
    8542 /**
    8543  * This is called by IEM_MC_FREE_LOCAL.
    8544  */
    8545 DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    8546 {
    8547     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8548     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
    8549     iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
    8550 }
    8551 
    8552 
    8553 #define IEM_MC_FREE_ARG(a_Name)     iemNativeVarFreeArg(pReNative, a_Name)
    8554 
    8555 /**
    8556  * This is called by IEM_MC_FREE_ARG.
    8557  */
    8558 DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    8559 {
    8560     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8561     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
    8562     iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
    8563 }
    8564 
    8565 
    8566 #define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
    8567 
    8568 /**
    8569  * This is called by IEM_MC_ASSIGN_TO_SMALLER.
    8570  */
    8571 DECL_INLINE_THROW(uint32_t)
    8572 iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
    8573 {
    8574     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
    8575     PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
    8576     AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    8577     Assert(   pVarDst->cbVar == sizeof(uint16_t)
    8578            || pVarDst->cbVar == sizeof(uint32_t));
    8579 
    8580     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
    8581     PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
    8582     AssertStmt(   pVarSrc->enmKind == kIemNativeVarKind_Stack
    8583                || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
    8584                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    8585 
    8586     Assert(pVarDst->cbVar < pVarSrc->cbVar);
    8587 
    8588     /*
    8589      * Special case for immediates.
    8590      */
    8591     if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
    8592     {
    8593         switch (pVarDst->cbVar)
    8594         {
    8595             case sizeof(uint16_t):
    8596                 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
    8597                 break;
    8598             case sizeof(uint32_t):
    8599                 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
    8600                 break;
    8601             default: AssertFailed(); break;
    8602         }
    8603     }
    8604     else
    8605     {
    8606         /*
    8607          * The generic solution for now.
    8608          */
    8609         /** @todo optimize this by having the python script make sure the source
    8610          *        variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
    8611          *        statement.   Then we could just transfer the register assignments. */
    8612         uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
    8613         uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
    8614         switch (pVarDst->cbVar)
    8615         {
    8616             case sizeof(uint16_t):
    8617                 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
    8618                 break;
    8619             case sizeof(uint32_t):
    8620                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
    8621                 break;
    8622             default: AssertFailed(); break;
    8623         }
    8624         iemNativeVarRegisterRelease(pReNative, idxVarSrc);
    8625         iemNativeVarRegisterRelease(pReNative, idxVarDst);
    8626     }
    8627     return off;
    8628 }
    8629 
    8630 
    8631 
    8632 /*********************************************************************************************************************************
    8633 *   Emitters for IEM_MC_CALL_CIMPL_XXX                                                                                           *
    8634 *********************************************************************************************************************************/
    8635 
    8636 /**
    8637  * Emits code to load a reference to the given guest register into @a idxGprDst.
    8638   */
    8639 DECL_INLINE_THROW(uint32_t)
    8640 iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
    8641                                IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
    8642 {
    8643     /*
    8644      * Get the offset relative to the CPUMCTX structure.
    8645      */
    8646     uint32_t offCpumCtx;
    8647     switch (enmClass)
    8648     {
    8649         case kIemNativeGstRegRef_Gpr:
    8650             Assert(idxRegInClass < 16);
    8651             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
    8652             break;
    8653 
    8654         case kIemNativeGstRegRef_GprHighByte:    /**< AH, CH, DH, BH*/
    8655             Assert(idxRegInClass < 4);
    8656             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
    8657             break;
    8658 
    8659         case kIemNativeGstRegRef_EFlags:
    8660             Assert(idxRegInClass == 0);
    8661             offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
    8662             break;
    8663 
    8664         case kIemNativeGstRegRef_MxCsr:
    8665             Assert(idxRegInClass == 0);
    8666             offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
    8667             break;
    8668 
    8669         case kIemNativeGstRegRef_FpuReg:
    8670             Assert(idxRegInClass < 8);
    8671             AssertFailed(); /** @todo what kind of indexing? */
    8672             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    8673             break;
    8674 
    8675         case kIemNativeGstRegRef_MReg:
    8676             Assert(idxRegInClass < 8);
    8677             AssertFailed(); /** @todo what kind of indexing? */
    8678             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    8679             break;
    8680 
    8681         case kIemNativeGstRegRef_XReg:
    8682             Assert(idxRegInClass < 16);
    8683             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
    8684             break;
    8685 
    8686         default:
    8687             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
    8688     }
    8689 
    8690     /*
    8691      * Load the value into the destination register.
    8692      */
    8693 #ifdef RT_ARCH_AMD64
    8694     off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
    8695 
    8696 #elif defined(RT_ARCH_ARM64)
    8697     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    8698     Assert(offCpumCtx < 4096);
    8699     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
    8700 
    8701 #else
    8702 # error "Port me!"
    8703 #endif
    8704 
    8705     return off;
    8706 }
    8707 
    8708 
    8709 /**
    8710  * Common code for CIMPL and AIMPL calls.
    8711  *
    8712  * These are calls that uses argument variables and such.  They should not be
    8713  * confused with internal calls required to implement an MC operation,
    8714  * like a TLB load and similar.
    8715  *
    8716  * Upon return all that is left to do is to load any hidden arguments and
    8717  * perform the call. All argument variables are freed.
    8718  *
    8719  * @returns New code buffer offset; throws VBox status code on error.
    8720  * @param   pReNative       The native recompile state.
    8721  * @param   off             The code buffer offset.
    8722  * @param   cArgs           The total nubmer of arguments (includes hidden
    8723  *                          count).
    8724  * @param   cHiddenArgs     The number of hidden arguments.  The hidden
    8725  *                          arguments must not have any variable declared for
    8726  *                          them, whereas all the regular arguments must
    8727  *                          (tstIEMCheckMc ensures this).
    8728  */
    8729 DECL_HIDDEN_THROW(uint32_t)
    8730 iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
    8731 {
    8732 #ifdef VBOX_STRICT
    8733     /*
    8734      * Assert sanity.
    8735      */
    8736     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    8737     Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
    8738     for (unsigned i = 0; i < cHiddenArgs; i++)
    8739         Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
    8740     for (unsigned i = cHiddenArgs; i < cArgs; i++)
    8741     {
    8742         Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
    8743         Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
    8744     }
    8745     iemNativeRegAssertSanity(pReNative);
    8746 #endif
    8747 
    8748     /*
    8749      * Before we do anything else, go over variables that are referenced and
    8750      * make sure they are not in a register.
    8751      */
    8752     uint32_t bmVars = pReNative->Core.bmVars;
    8753     if (bmVars)
    8754     {
    8755         do
    8756         {
    8757             uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    8758             bmVars &= ~RT_BIT_32(idxVar);
    8759 
    8760             if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
    8761             {
    8762                 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
    8763                 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8764                 {
    8765                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    8766                     Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
    8767                            idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
    8768                            idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8769                     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    8770 
    8771                     pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    8772                     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    8773                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    8774                     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    8775                     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    8776                 }
    8777             }
    8778         } while (bmVars != 0);
    8779 #if 0 //def VBOX_STRICT
    8780         iemNativeRegAssertSanity(pReNative);
    8781 #endif
    8782     }
    8783 
    8784     uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
    8785 
    8786     /*
    8787      * First, go over the host registers that will be used for arguments and make
    8788      * sure they either hold the desired argument or are free.
    8789      */
    8790     if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
    8791     {
    8792         for (uint32_t i = 0; i < cRegArgs; i++)
    8793         {
    8794             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    8795             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    8796             {
    8797                 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
    8798                 {
    8799                     uint8_t const       idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
    8800                     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8801                     PIEMNATIVEVAR const pVar   = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8802                     Assert(pVar->idxReg == idxArgReg);
    8803                     uint8_t const       uArgNo = pVar->uArgNo;
    8804                     if (uArgNo == i)
    8805                     { /* prefect */ }
    8806                     /* The variable allocator logic should make sure this is impossible,
    8807                        except for when the return register is used as a parameter (ARM,
    8808                        but not x86). */
    8809 #if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
    8810                     else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
    8811                     {
    8812 # ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    8813 #  error "Implement this"
    8814 # endif
    8815                         Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
    8816                         uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
    8817                         AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
    8818                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    8819                         off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
    8820                     }
    8821 #endif
    8822                     else
    8823                     {
    8824                         AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    8825 
    8826                         if (pVar->enmKind == kIemNativeVarKind_Stack)
    8827                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    8828                         else
    8829                         {
    8830                             /* just free it, can be reloaded if used again */
    8831                             pVar->idxReg               = UINT8_MAX;
    8832                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
    8833                             iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
    8834                         }
    8835                     }
    8836                 }
    8837                 else
    8838                     AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
    8839                                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
    8840             }
    8841         }
    8842 #if 0 //def VBOX_STRICT
    8843         iemNativeRegAssertSanity(pReNative);
    8844 #endif
    8845     }
    8846 
    8847     Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
    8848 
    8849 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    8850     /*
    8851      * If there are any stack arguments, make sure they are in their place as well.
    8852      *
    8853      * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
    8854      * the caller) be loading it later and it must be free (see first loop).
    8855      */
    8856     if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
    8857     {
    8858         for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
    8859         {
    8860             PIEMNATIVEVAR const pVar      = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
    8861             int32_t const       offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
    8862             if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8863             {
    8864                 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
    8865                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
    8866                 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
    8867                 pVar->idxReg = UINT8_MAX;
    8868             }
    8869             else
    8870             {
    8871                 /* Use ARG0 as temp for stuff we need registers for. */
    8872                 switch (pVar->enmKind)
    8873                 {
    8874                     case kIemNativeVarKind_Stack:
    8875                     {
    8876                         uint8_t const idxStackSlot = pVar->idxStackSlot;
    8877                         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8878                         off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
    8879                                                        iemNativeStackCalcBpDisp(idxStackSlot));
    8880                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    8881                         continue;
    8882                     }
    8883 
    8884                     case kIemNativeVarKind_Immediate:
    8885                         off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
    8886                         continue;
    8887 
    8888                     case kIemNativeVarKind_VarRef:
    8889                     {
    8890                         uint8_t const idxOtherVar    = pVar->u.idxRefVar; /* unpacked */
    8891                         Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    8892                         uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    8893                         int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    8894                         uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    8895                         if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8896                         {
    8897                             off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    8898                             iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    8899                             Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    8900                         }
    8901                         Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    8902                                && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    8903                         off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
    8904                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    8905                         continue;
    8906                     }
    8907 
    8908                     case kIemNativeVarKind_GstRegRef:
    8909                         off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
    8910                                                              pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
    8911                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    8912                         continue;
    8913 
    8914                     case kIemNativeVarKind_Invalid:
    8915                     case kIemNativeVarKind_End:
    8916                         break;
    8917                 }
    8918                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    8919             }
    8920         }
    8921 # if 0 //def VBOX_STRICT
    8922         iemNativeRegAssertSanity(pReNative);
    8923 # endif
    8924     }
    8925 #else
    8926     AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
    8927 #endif
    8928 
    8929     /*
    8930      * Make sure the argument variables are loaded into their respective registers.
    8931      *
    8932      * We can optimize this by ASSUMING that any register allocations are for
    8933      * registeres that have already been loaded and are ready.  The previous step
    8934      * saw to that.
    8935      */
    8936     if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
    8937     {
    8938         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    8939         {
    8940             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    8941             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    8942                 Assert(   pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
    8943                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
    8944                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
    8945             else
    8946             {
    8947                 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
    8948                 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8949                 {
    8950                     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    8951                     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
    8952                     pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
    8953                                               | RT_BIT_32(idxArgReg);
    8954                     pVar->idxReg = idxArgReg;
    8955                 }
    8956                 else
    8957                 {
    8958                     /* Use ARG0 as temp for stuff we need registers for. */
    8959                     switch (pVar->enmKind)
    8960                     {
    8961                         case kIemNativeVarKind_Stack:
    8962                         {
    8963                             uint8_t const idxStackSlot = pVar->idxStackSlot;
    8964                             AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8965                             off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
    8966                             continue;
    8967                         }
    8968 
    8969                         case kIemNativeVarKind_Immediate:
    8970                             off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
    8971                             continue;
    8972 
    8973                         case kIemNativeVarKind_VarRef:
    8974                         {
    8975                             uint8_t const idxOtherVar    = pVar->u.idxRefVar; /* unpacked */
    8976                             Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    8977                             uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative,
    8978                                                                                     IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    8979                             int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    8980                             uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    8981                             if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8982                             {
    8983                                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    8984                                 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    8985                                 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    8986                             }
    8987                             Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    8988                                    && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    8989                             off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
    8990                             continue;
    8991                         }
    8992 
    8993                         case kIemNativeVarKind_GstRegRef:
    8994                             off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
    8995                                                                  pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
    8996                             continue;
    8997 
    8998                         case kIemNativeVarKind_Invalid:
    8999                         case kIemNativeVarKind_End:
    9000                             break;
    9001                     }
    9002                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    9003                 }
    9004             }
    9005         }
    9006 #if 0 //def VBOX_STRICT
    9007         iemNativeRegAssertSanity(pReNative);
    9008 #endif
    9009     }
    9010 #ifdef VBOX_STRICT
    9011     else
    9012         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    9013         {
    9014             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
    9015             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
    9016         }
    9017 #endif
    9018 
    9019     /*
    9020      * Free all argument variables (simplified).
    9021      * Their lifetime always expires with the call they are for.
    9022      */
    9023     /** @todo Make the python script check that arguments aren't used after
    9024      *        IEM_MC_CALL_XXXX. */
    9025     /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
    9026      *        a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
    9027      *        an argument value.  There is also some FPU stuff. */
    9028     for (uint32_t i = cHiddenArgs; i < cArgs; i++)
    9029     {
    9030         uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
    9031         Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    9032 
    9033         /* no need to free registers: */
    9034         AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
    9035                   ?    pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
    9036                     || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
    9037                   : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
    9038                   ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
    9039                    i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
    9040 
    9041         pReNative->Core.aidxArgVars[i] = UINT8_MAX;
    9042         pReNative->Core.bmVars        &= ~RT_BIT_32(idxVar);
    9043         iemNativeVarFreeStackSlots(pReNative, idxVar);
    9044     }
    9045     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    9046 
    9047     /*
    9048      * Flush volatile registers as we make the call.
    9049      */
    9050     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
    9051 
    9052     return off;
    9053 }
    9054 
    9055 
    9056 /** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
    9057 DECL_HIDDEN_THROW(uint32_t)
    9058 iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
    9059                              uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
    9060 
    9061 {
    9062     /*
    9063      * Do all the call setup and cleanup.
    9064      */
    9065     off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
    9066 
    9067     /*
    9068      * Load the two or three hidden arguments.
    9069      */
    9070 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    9071     off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    9072     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    9073     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
    9074 #else
    9075     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    9076     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
    9077 #endif
    9078 
    9079     /*
    9080      * Make the call and check the return code.
    9081      *
    9082      * Shadow PC copies are always flushed here, other stuff depends on flags.
    9083      * Segment and general purpose registers are explictily flushed via the
    9084      * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
    9085      * macros.
    9086      */
    9087     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
    9088 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    9089     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    9090 #endif
    9091     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
    9092     if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls.  */
    9093         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    9094     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    9095 
    9096     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    9097 }
    9098 
    9099 
    9100 #define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
    9101     off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
    9102 
    9103 /** Emits code for IEM_MC_CALL_CIMPL_1. */
    9104 DECL_INLINE_THROW(uint32_t)
    9105 iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    9106                         uintptr_t pfnCImpl, uint8_t idxArg0)
    9107 {
    9108     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    9109     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
    9110 }
    9111 
    9112 
    9113 #define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
    9114     off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
    9115 
    9116 /** Emits code for IEM_MC_CALL_CIMPL_2. */
    9117 DECL_INLINE_THROW(uint32_t)
    9118 iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    9119                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
    9120 {
    9121     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    9122     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    9123     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
    9124 }
    9125 
    9126 
    9127 #define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
    9128     off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    9129                                   (uintptr_t)a_pfnCImpl, a0, a1, a2)
    9130 
    9131 /** Emits code for IEM_MC_CALL_CIMPL_3. */
    9132 DECL_INLINE_THROW(uint32_t)
    9133 iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    9134                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
    9135 {
    9136     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    9137     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    9138     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    9139     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
    9140 }
    9141 
    9142 
    9143 #define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
    9144     off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    9145                                   (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
    9146 
    9147 /** Emits code for IEM_MC_CALL_CIMPL_4. */
    9148 DECL_INLINE_THROW(uint32_t)
    9149 iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    9150                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
    9151 {
    9152     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    9153     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    9154     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    9155     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
    9156     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
    9157 }
    9158 
    9159 
    9160 #define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
    9161     off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    9162                                   (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
    9163 
    9164 /** Emits code for IEM_MC_CALL_CIMPL_4. */
    9165 DECL_INLINE_THROW(uint32_t)
    9166 iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    9167                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
    9168 {
    9169     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    9170     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    9171     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    9172     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
    9173     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
    9174     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
    9175 }
    9176 
    9177 
    9178 /** Recompiler debugging: Flush guest register shadow copies. */
    9179 #define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
    9180 
    9181 
    9182 
    9183 /*********************************************************************************************************************************
    9184 *   Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX                                                            *
    9185 *********************************************************************************************************************************/
    9186 
    9187 /**
    9188  * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
    9189  */
    9190 DECL_INLINE_THROW(uint32_t)
    9191 iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    9192                              uintptr_t pfnAImpl, uint8_t cArgs)
    9193 {
    9194     if (idxVarRc != UINT8_MAX)
    9195     {
    9196         IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
    9197         PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
    9198         AssertStmt(pVarRc->uArgNo == UINT8_MAX,       IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
    9199         AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
    9200     }
    9201 
    9202     /*
    9203      * Do all the call setup and cleanup.
    9204      */
    9205     off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
    9206 
    9207     /*
    9208      * Make the call and update the return code variable if we've got one.
    9209      */
    9210     off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
    9211     if (idxVarRc != UINT8_MAX)
    9212     {
    9213 off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
    9214         iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
    9215     }
    9216 
    9217     return off;
    9218 }
    9219 
    9220 
    9221 
    9222 #define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
    9223     off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
    9224 
    9225 #define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
    9226     off = iemNativeEmitCallAImpl0(pReNative, off, a_rc,                   (uintptr_t)(a_pfn))
    9227 
    9228 /** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
    9229 DECL_INLINE_THROW(uint32_t)
    9230 iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
    9231 {
    9232     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
    9233 }
    9234 
    9235 
    9236 #define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
    9237     off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
    9238 
    9239 #define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
    9240     off = iemNativeEmitCallAImpl1(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0)
    9241 
    9242 /** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
    9243 DECL_INLINE_THROW(uint32_t)
    9244 iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
    9245 {
    9246     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    9247     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
    9248 }
    9249 
    9250 
    9251 #define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
    9252     off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
    9253 
    9254 #define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
    9255     off = iemNativeEmitCallAImpl2(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1)
    9256 
    9257 /** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
    9258 DECL_INLINE_THROW(uint32_t)
    9259 iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    9260                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
    9261 {
    9262     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    9263     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    9264     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
    9265 }
    9266 
    9267 
    9268 #define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
    9269     off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
    9270 
    9271 #define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
    9272     off = iemNativeEmitCallAImpl3(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1, a2)
    9273 
    9274 /** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
    9275 DECL_INLINE_THROW(uint32_t)
    9276 iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    9277                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
    9278 {
    9279     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    9280     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    9281     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
    9282     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
    9283 }
    9284 
    9285 
    9286 #define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
    9287     off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
    9288 
    9289 #define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
    9290     off = iemNativeEmitCallAImpl4(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1, a2, a3)
    9291 
    9292 /** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
    9293 DECL_INLINE_THROW(uint32_t)
    9294 iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    9295                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
    9296 {
    9297     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    9298     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    9299     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
    9300     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
    9301     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
    9302 }
    9303 
    9304 
    9305 
    9306 /*********************************************************************************************************************************
    9307 *   Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX).                                                       *
    9308 *********************************************************************************************************************************/
    9309 
    9310 #define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
    9311     off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst,  a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
    9312 
    9313 #define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
    9314     off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
    9315 
    9316 #define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
    9317     off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
    9318 
    9319 #define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
    9320     off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
    9321 
    9322 
    9323 /** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
    9324  *  IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
    9325 DECL_INLINE_THROW(uint32_t)
    9326 iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
    9327 {
    9328     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    9329     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
    9330     Assert(iGRegEx < 20);
    9331 
    9332     /* Same discussion as in iemNativeEmitFetchGregU16 */
    9333     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    9334                                                                   kIemNativeGstRegUse_ReadOnly);
    9335 
    9336     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    9337     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    9338 
    9339     /* The value is zero-extended to the full 64-bit host register width. */
    9340     if (iGRegEx < 16)
    9341         off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    9342     else
    9343         off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
    9344 
    9345     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    9346     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    9347     return off;
    9348 }
    9349 
    9350 
    9351 #define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
    9352     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
    9353 
    9354 #define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
    9355     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
    9356 
    9357 #define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
    9358     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
    9359 
    9360 /** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
    9361 DECL_INLINE_THROW(uint32_t)
    9362 iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
    9363 {
    9364     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    9365     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
    9366     Assert(iGRegEx < 20);
    9367 
    9368     /* Same discussion as in iemNativeEmitFetchGregU16 */
    9369     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    9370                                                                   kIemNativeGstRegUse_ReadOnly);
    9371 
    9372     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    9373     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    9374 
    9375     if (iGRegEx < 16)
    9376     {
    9377         switch (cbSignExtended)
    9378         {
    9379             case sizeof(uint16_t):
    9380                 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    9381                 break;
    9382             case sizeof(uint32_t):
    9383                 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    9384                 break;
    9385             case sizeof(uint64_t):
    9386                 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    9387                 break;
    9388             default: AssertFailed(); break;
    9389         }
    9390     }
    9391     else
    9392     {
    9393         off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
    9394         switch (cbSignExtended)
    9395         {
    9396             case sizeof(uint16_t):
    9397                 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    9398                 break;
    9399             case sizeof(uint32_t):
    9400                 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    9401                 break;
    9402             case sizeof(uint64_t):
    9403                 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    9404                 break;
    9405             default: AssertFailed(); break;
    9406         }
    9407     }
    9408 
    9409     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    9410     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    9411     return off;
    9412 }
    9413 
    9414 
    9415 
    9416 #define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
    9417     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
    9418 
    9419 #define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
    9420     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
    9421 
    9422 #define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
    9423     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
    9424 
    9425 /** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
    9426 DECL_INLINE_THROW(uint32_t)
    9427 iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
    9428 {
    9429     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    9430     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
    9431     Assert(iGReg < 16);
    9432 
    9433     /*
    9434      * We can either just load the low 16-bit of the GPR into a host register
    9435      * for the variable, or we can do so via a shadow copy host register. The
    9436      * latter will avoid having to reload it if it's being stored later, but
    9437      * will waste a host register if it isn't touched again.  Since we don't
    9438      * know what going to happen, we choose the latter for now.
    9439      */
    9440     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9441                                                                   kIemNativeGstRegUse_ReadOnly);
    9442 
    9443     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    9444     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    9445     off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    9446     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    9447 
    9448     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    9449     return off;
    9450 }
    9451 
    9452 
    9453 #define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
    9454     off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
    9455 
    9456 #define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
    9457     off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
    9458 
    9459 /** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
    9460 DECL_INLINE_THROW(uint32_t)
    9461 iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
    9462 {
    9463     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    9464     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
    9465     Assert(iGReg < 16);
    9466 
    9467     /*
    9468      * We can either just load the low 16-bit of the GPR into a host register
    9469      * for the variable, or we can do so via a shadow copy host register. The
    9470      * latter will avoid having to reload it if it's being stored later, but
    9471      * will waste a host register if it isn't touched again.  Since we don't
    9472      * know what going to happen, we choose the latter for now.
    9473      */
    9474     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9475                                                                   kIemNativeGstRegUse_ReadOnly);
    9476 
    9477     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    9478     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    9479     if (cbSignExtended == sizeof(uint32_t))
    9480         off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    9481     else
    9482     {
    9483         Assert(cbSignExtended == sizeof(uint64_t));
    9484         off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    9485     }
    9486     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    9487 
    9488     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    9489     return off;
    9490 }
    9491 
    9492 
    9493 #define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
    9494     off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
    9495 
    9496 #define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
    9497     off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
    9498 
    9499 /** Emits code for IEM_MC_FETCH_GREG_U32. */
    9500 DECL_INLINE_THROW(uint32_t)
    9501 iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
    9502 {
    9503     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    9504     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
    9505     Assert(iGReg < 16);
    9506 
    9507     /*
    9508      * We can either just load the low 16-bit of the GPR into a host register
    9509      * for the variable, or we can do so via a shadow copy host register. The
    9510      * latter will avoid having to reload it if it's being stored later, but
    9511      * will waste a host register if it isn't touched again.  Since we don't
    9512      * know what going to happen, we choose the latter for now.
    9513      */
    9514     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9515                                                                   kIemNativeGstRegUse_ReadOnly);
    9516 
    9517     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    9518     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    9519     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
    9520     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    9521 
    9522     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    9523     return off;
    9524 }
    9525 
    9526 
    9527 #define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
    9528     off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
    9529 
    9530 /** Emits code for IEM_MC_FETCH_GREG_U32. */
    9531 DECL_INLINE_THROW(uint32_t)
    9532 iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
    9533 {
    9534     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    9535     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
    9536     Assert(iGReg < 16);
    9537 
    9538     /*
    9539      * We can either just load the low 32-bit of the GPR into a host register
    9540      * for the variable, or we can do so via a shadow copy host register. The
    9541      * latter will avoid having to reload it if it's being stored later, but
    9542      * will waste a host register if it isn't touched again.  Since we don't
    9543      * know what going to happen, we choose the latter for now.
    9544      */
    9545     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9546                                                                   kIemNativeGstRegUse_ReadOnly);
    9547 
    9548     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    9549     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    9550     off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
    9551     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    9552 
    9553     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    9554     return off;
    9555 }
    9556 
    9557 
    9558 #define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
    9559     off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
    9560 
    9561 #define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
    9562     off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
    9563 
    9564 /** Emits code for IEM_MC_FETCH_GREG_U64 (and the
    9565  *  IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
    9566 DECL_INLINE_THROW(uint32_t)
    9567 iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
    9568 {
    9569     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    9570     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
    9571     Assert(iGReg < 16);
    9572 
    9573     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9574                                                                   kIemNativeGstRegUse_ReadOnly);
    9575 
    9576     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    9577     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    9578     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
    9579     /** @todo name the register a shadow one already? */
    9580     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    9581 
    9582     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    9583     return off;
    9584 }
    9585 
    9586 
    9587 
    9588 /*********************************************************************************************************************************
    9589 *   Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX).                                                        *
    9590 *********************************************************************************************************************************/
    9591 
    9592 #define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
    9593     off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
    9594 
    9595 /** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
    9596 DECL_INLINE_THROW(uint32_t)
    9597 iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
    9598 {
    9599     Assert(iGRegEx < 20);
    9600     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    9601                                                                  kIemNativeGstRegUse_ForUpdate);
    9602 #ifdef RT_ARCH_AMD64
    9603     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
    9604 
    9605     /* To the lowest byte of the register: mov r8, imm8 */
    9606     if (iGRegEx < 16)
    9607     {
    9608         if (idxGstTmpReg >= 8)
    9609             pbCodeBuf[off++] = X86_OP_REX_B;
    9610         else if (idxGstTmpReg >= 4)
    9611             pbCodeBuf[off++] = X86_OP_REX;
    9612         pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
    9613         pbCodeBuf[off++] = u8Value;
    9614     }
    9615     /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
    9616     else if (idxGstTmpReg < 4)
    9617     {
    9618         pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
    9619         pbCodeBuf[off++] = u8Value;
    9620     }
    9621     else
    9622     {
    9623         /* ror reg64, 8 */
    9624         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    9625         pbCodeBuf[off++] = 0xc1;
    9626         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    9627         pbCodeBuf[off++] = 8;
    9628 
    9629         /* mov reg8, imm8  */
    9630         if (idxGstTmpReg >= 8)
    9631             pbCodeBuf[off++] = X86_OP_REX_B;
    9632         else if (idxGstTmpReg >= 4)
    9633             pbCodeBuf[off++] = X86_OP_REX;
    9634         pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
    9635         pbCodeBuf[off++] = u8Value;
    9636 
    9637         /* rol reg64, 8 */
    9638         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    9639         pbCodeBuf[off++] = 0xc1;
    9640         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    9641         pbCodeBuf[off++] = 8;
    9642     }
    9643 
    9644 #elif defined(RT_ARCH_ARM64)
    9645     uint8_t const    idxImmReg   = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
    9646     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    9647     if (iGRegEx < 16)
    9648         /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
    9649         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
    9650     else
    9651         /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
    9652         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
    9653     iemNativeRegFreeTmp(pReNative, idxImmReg);
    9654 
    9655 #else
    9656 # error "Port me!"
    9657 #endif
    9658 
    9659     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9660 
    9661     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
    9662 
    9663     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9664     return off;
    9665 }
    9666 
    9667 
    9668 #define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
    9669     off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
    9670 
    9671 /** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
    9672 DECL_INLINE_THROW(uint32_t)
    9673 iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
    9674 {
    9675     Assert(iGRegEx < 20);
    9676     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    9677 
    9678     /*
    9679      * If it's a constant value (unlikely) we treat this as a
    9680      * IEM_MC_STORE_GREG_U8_CONST statement.
    9681      */
    9682     PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
    9683     if (pValueVar->enmKind == kIemNativeVarKind_Stack)
    9684     { /* likely */ }
    9685     else
    9686     {
    9687         AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
    9688                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    9689         return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
    9690     }
    9691 
    9692     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    9693                                                                  kIemNativeGstRegUse_ForUpdate);
    9694     uint8_t const    idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
    9695 
    9696 #ifdef RT_ARCH_AMD64
    9697     /* To the lowest byte of the register: mov reg8, reg8(r/m) */
    9698     if (iGRegEx < 16)
    9699     {
    9700         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    9701         if (idxGstTmpReg >= 8 || idxVarReg >= 8)
    9702             pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
    9703         else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
    9704             pbCodeBuf[off++] = X86_OP_REX;
    9705         pbCodeBuf[off++] = 0x8a;
    9706         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
    9707     }
    9708     /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
    9709     else if (idxGstTmpReg < 4 && idxVarReg < 4)
    9710     {
    9711         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
    9712         pbCodeBuf[off++] = 0x8a;
    9713         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
    9714     }
    9715     else
    9716     {
    9717         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
    9718 
    9719         /* ror reg64, 8 */
    9720         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    9721         pbCodeBuf[off++] = 0xc1;
    9722         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    9723         pbCodeBuf[off++] = 8;
    9724 
    9725         /* mov reg8, reg8(r/m)  */
    9726         if (idxGstTmpReg >= 8 || idxVarReg >= 8)
    9727             pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
    9728         else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
    9729             pbCodeBuf[off++] = X86_OP_REX;
    9730         pbCodeBuf[off++] = 0x8a;
    9731         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
    9732 
    9733         /* rol reg64, 8 */
    9734         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    9735         pbCodeBuf[off++] = 0xc1;
    9736         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    9737         pbCodeBuf[off++] = 8;
    9738     }
    9739 
    9740 #elif defined(RT_ARCH_ARM64)
    9741     /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
    9742             or
    9743        bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
    9744     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9745     if (iGRegEx < 16)
    9746         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
    9747     else
    9748         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
    9749 
    9750 #else
    9751 # error "Port me!"
    9752 #endif
    9753     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9754 
    9755     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    9756 
    9757     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
    9758     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9759     return off;
    9760 }
    9761 
    9762 
    9763 
    9764 #define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
    9765     off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
    9766 
    9767 /** Emits code for IEM_MC_STORE_GREG_U16. */
    9768 DECL_INLINE_THROW(uint32_t)
    9769 iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
    9770 {
    9771     Assert(iGReg < 16);
    9772     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9773                                                                  kIemNativeGstRegUse_ForUpdate);
    9774 #ifdef RT_ARCH_AMD64
    9775     /* mov reg16, imm16 */
    9776     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    9777     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    9778     if (idxGstTmpReg >= 8)
    9779         pbCodeBuf[off++] = X86_OP_REX_B;
    9780     pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
    9781     pbCodeBuf[off++] = RT_BYTE1(uValue);
    9782     pbCodeBuf[off++] = RT_BYTE2(uValue);
    9783 
    9784 #elif defined(RT_ARCH_ARM64)
    9785     /* movk xdst, #uValue, lsl #0 */
    9786     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9787     pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
    9788 
    9789 #else
    9790 # error "Port me!"
    9791 #endif
    9792 
    9793     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9794 
    9795     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9796     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9797     return off;
    9798 }
    9799 
    9800 
    9801 #define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
    9802     off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
    9803 
    9804 /** Emits code for IEM_MC_STORE_GREG_U16. */
    9805 DECL_INLINE_THROW(uint32_t)
    9806 iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    9807 {
    9808     Assert(iGReg < 16);
    9809     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    9810 
    9811     /*
    9812      * If it's a constant value (unlikely) we treat this as a
    9813      * IEM_MC_STORE_GREG_U16_CONST statement.
    9814      */
    9815     PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
    9816     if (pValueVar->enmKind == kIemNativeVarKind_Stack)
    9817     { /* likely */ }
    9818     else
    9819     {
    9820         AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
    9821                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    9822         return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
    9823     }
    9824 
    9825     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9826                                                                  kIemNativeGstRegUse_ForUpdate);
    9827 
    9828 #ifdef RT_ARCH_AMD64
    9829     /* mov reg16, reg16 or [mem16] */
    9830     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
    9831     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    9832     if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9833     {
    9834         if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
    9835             pbCodeBuf[off++] = (idxGstTmpReg      >= 8 ? X86_OP_REX_R : 0)
    9836                              | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
    9837         pbCodeBuf[off++] = 0x8b;
    9838         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
    9839     }
    9840     else
    9841     {
    9842         uint8_t const idxStackSlot = pValueVar->idxStackSlot;
    9843         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    9844         if (idxGstTmpReg >= 8)
    9845             pbCodeBuf[off++] = X86_OP_REX_R;
    9846         pbCodeBuf[off++] = 0x8b;
    9847         off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
    9848     }
    9849 
    9850 #elif defined(RT_ARCH_ARM64)
    9851     /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
    9852     uint8_t const    idxVarReg   = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
    9853     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9854     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
    9855     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    9856 
    9857 #else
    9858 # error "Port me!"
    9859 #endif
    9860 
    9861     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9862 
    9863     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9864     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9865     return off;
    9866 }
    9867 
    9868 
    9869 #define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
    9870     off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
    9871 
    9872 /** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
    9873 DECL_INLINE_THROW(uint32_t)
    9874 iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
    9875 {
    9876     Assert(iGReg < 16);
    9877     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9878                                                                  kIemNativeGstRegUse_ForFullWrite);
    9879     off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
    9880     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9881     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9882     return off;
    9883 }
    9884 
    9885 
    9886 #define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
    9887     off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
    9888 
    9889 /** Emits code for IEM_MC_STORE_GREG_U32. */
    9890 DECL_INLINE_THROW(uint32_t)
    9891 iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    9892 {
    9893     Assert(iGReg < 16);
    9894     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    9895 
    9896     /*
    9897      * If it's a constant value (unlikely) we treat this as a
    9898      * IEM_MC_STORE_GREG_U32_CONST statement.
    9899      */
    9900     PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
    9901     if (pValueVar->enmKind == kIemNativeVarKind_Stack)
    9902     { /* likely */ }
    9903     else
    9904     {
    9905         AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
    9906                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    9907         return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
    9908     }
    9909 
    9910     /*
    9911      * For the rest we allocate a guest register for the variable and writes
    9912      * it to the CPUMCTX structure.
    9913      */
    9914     uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
    9915     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9916 #ifdef VBOX_STRICT
    9917     off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
    9918 #endif
    9919     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    9920     return off;
    9921 }
    9922 
    9923 
    9924 #define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
    9925     off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
    9926 
    9927 /** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
    9928 DECL_INLINE_THROW(uint32_t)
    9929 iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
    9930 {
    9931     Assert(iGReg < 16);
    9932     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9933                                                                  kIemNativeGstRegUse_ForFullWrite);
    9934     off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
    9935     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9936     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9937     return off;
    9938 }
    9939 
    9940 
    9941 #define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
    9942     off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
    9943 
    9944 /** Emits code for IEM_MC_STORE_GREG_U64. */
    9945 DECL_INLINE_THROW(uint32_t)
    9946 iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    9947 {
    9948     Assert(iGReg < 16);
    9949     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    9950 
    9951     /*
    9952      * If it's a constant value (unlikely) we treat this as a
    9953      * IEM_MC_STORE_GREG_U64_CONST statement.
    9954      */
    9955     PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
    9956     if (pValueVar->enmKind == kIemNativeVarKind_Stack)
    9957     { /* likely */ }
    9958     else
    9959     {
    9960         AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
    9961                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    9962         return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
    9963     }
    9964 
    9965     /*
    9966      * For the rest we allocate a guest register for the variable and writes
    9967      * it to the CPUMCTX structure.
    9968      */
    9969     uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
    9970     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9971     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    9972     return off;
    9973 }
    9974 
    9975 
    9976 #define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
    9977     off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
    9978 
    9979 /** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
    9980 DECL_INLINE_THROW(uint32_t)
    9981 iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
    9982 {
    9983     Assert(iGReg < 16);
    9984     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9985                                                                  kIemNativeGstRegUse_ForUpdate);
    9986     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
    9987     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9988     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9989     return off;
    9990 }
    9991 
    9992 
    9993 /*********************************************************************************************************************************
    9994 *   General purpose register manipulation (add, sub).                                                                            *
    9995 *********************************************************************************************************************************/
    9996 
    9997 #define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
    9998     off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
    9999 
    10000 /** Emits code for IEM_MC_ADD_GREG_U16. */
    10001 DECL_INLINE_THROW(uint32_t)
    10002 iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
    10003 {
    10004     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    10005                                                                  kIemNativeGstRegUse_ForUpdate);
    10006 
    10007 #ifdef RT_ARCH_AMD64
    10008     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    10009     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    10010     if (idxGstTmpReg >= 8)
    10011         pbCodeBuf[off++] = X86_OP_REX_B;
    10012     if (uAddend == 1)
    10013     {
    10014         pbCodeBuf[off++] = 0xff; /* inc */
    10015         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    10016     }
    10017     else
    10018     {
    10019         pbCodeBuf[off++] = 0x81;
    10020         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    10021         pbCodeBuf[off++] = uAddend;
    10022         pbCodeBuf[off++] = 0;
    10023     }
    10024 
    10025 #else
    10026     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    10027     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    10028 
    10029     /* sub tmp, gstgrp, uAddend */
    10030     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
    10031 
    10032     /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
    10033     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
    10034 
    10035     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    10036 #endif
    10037 
    10038     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10039 
    10040     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    10041 
    10042     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    10043     return off;
    10044 }
    10045 
    10046 
    10047 #define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
    10048     off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
    10049 
    10050 #define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
    10051     off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
    10052 
    10053 /** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
    10054 DECL_INLINE_THROW(uint32_t)
    10055 iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
    10056 {
    10057     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    10058                                                                  kIemNativeGstRegUse_ForUpdate);
    10059 
    10060 #ifdef RT_ARCH_AMD64
    10061     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    10062     if (f64Bit)
    10063         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
    10064     else if (idxGstTmpReg >= 8)
    10065         pbCodeBuf[off++] = X86_OP_REX_B;
    10066     if (uAddend == 1)
    10067     {
    10068         pbCodeBuf[off++] = 0xff; /* inc */
    10069         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    10070     }
    10071     else if (uAddend < 128)
    10072     {
    10073         pbCodeBuf[off++] = 0x83; /* add */
    10074         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    10075         pbCodeBuf[off++] = RT_BYTE1(uAddend);
    10076     }
    10077     else
    10078     {
    10079         pbCodeBuf[off++] = 0x81; /* add */
    10080         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    10081         pbCodeBuf[off++] = RT_BYTE1(uAddend);
    10082         pbCodeBuf[off++] = 0;
    10083         pbCodeBuf[off++] = 0;
    10084         pbCodeBuf[off++] = 0;
    10085     }
    10086 
    10087 #else
    10088     /* sub tmp, gstgrp, uAddend */
    10089     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    10090     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
    10091 
    10092 #endif
    10093 
    10094     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10095 
    10096     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    10097 
    10098     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    10099     return off;
    10100 }
    10101 
    10102 
    10103 
    10104 #define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
    10105     off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
    10106 
    10107 /** Emits code for IEM_MC_SUB_GREG_U16. */
    10108 DECL_INLINE_THROW(uint32_t)
    10109 iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
    10110 {
    10111     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    10112                                                                  kIemNativeGstRegUse_ForUpdate);
    10113 
    10114 #ifdef RT_ARCH_AMD64
    10115     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    10116     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    10117     if (idxGstTmpReg >= 8)
    10118         pbCodeBuf[off++] = X86_OP_REX_B;
    10119     if (uSubtrahend == 1)
    10120     {
    10121         pbCodeBuf[off++] = 0xff; /* dec */
    10122         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    10123     }
    10124     else
    10125     {
    10126         pbCodeBuf[off++] = 0x81;
    10127         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    10128         pbCodeBuf[off++] = uSubtrahend;
    10129         pbCodeBuf[off++] = 0;
    10130     }
    10131 
    10132 #else
    10133     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    10134     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    10135 
    10136     /* sub tmp, gstgrp, uSubtrahend */
    10137     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
    10138 
    10139     /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
    10140     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
    10141 
    10142     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    10143 #endif
    10144 
    10145     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10146 
    10147     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    10148 
    10149     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    10150     return off;
    10151 }
    10152 
    10153 
    10154 #define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
    10155     off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
    10156 
    10157 #define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
    10158     off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
    10159 
    10160 /** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
    10161 DECL_INLINE_THROW(uint32_t)
    10162 iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
    10163 {
    10164     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    10165                                                                  kIemNativeGstRegUse_ForUpdate);
    10166 
    10167 #ifdef RT_ARCH_AMD64
    10168     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    10169     if (f64Bit)
    10170         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
    10171     else if (idxGstTmpReg >= 8)
    10172         pbCodeBuf[off++] = X86_OP_REX_B;
    10173     if (uSubtrahend == 1)
    10174     {
    10175         pbCodeBuf[off++] = 0xff; /* dec */
    10176         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    10177     }
    10178     else if (uSubtrahend < 128)
    10179     {
    10180         pbCodeBuf[off++] = 0x83; /* sub */
    10181         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    10182         pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
    10183     }
    10184     else
    10185     {
    10186         pbCodeBuf[off++] = 0x81; /* sub */
    10187         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    10188         pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
    10189         pbCodeBuf[off++] = 0;
    10190         pbCodeBuf[off++] = 0;
    10191         pbCodeBuf[off++] = 0;
    10192     }
    10193 
    10194 #else
    10195     /* sub tmp, gstgrp, uSubtrahend */
    10196     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    10197     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
    10198 
    10199 #endif
    10200 
    10201     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10202 
    10203     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    10204 
    10205     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    10206     return off;
    10207 }
    10208 
    10209 
    10210 /*********************************************************************************************************************************
    10211 *   Local variable manipulation (add, sub, and, or).                                                                             *
    10212 *********************************************************************************************************************************/
    10213 
    10214 #define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
    10215     off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
    10216 
    10217 #define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
    10218     off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
    10219 
    10220 #define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
    10221     off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
    10222 
    10223 #define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
    10224     off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
    10225 
    10226 /** Emits code for AND'ing a local and a constant value.   */
    10227 DECL_INLINE_THROW(uint32_t)
    10228 iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
    10229 {
    10230 #ifdef VBOX_STRICT
    10231     switch (cbMask)
    10232     {
    10233         case sizeof(uint8_t):  Assert((uint8_t)uMask  == uMask); break;
    10234         case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
    10235         case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
    10236         case sizeof(uint64_t): break;
    10237         default: AssertFailedBreak();
    10238     }
    10239 #endif
    10240 
    10241     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
    10242     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
    10243 
    10244     if (cbMask <= sizeof(uint32_t))
    10245         off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
    10246     else
    10247         off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
    10248 
    10249     iemNativeVarRegisterRelease(pReNative, idxVar);
    10250     return off;
    10251 }
    10252 
    10253 
    10254 #define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
    10255     off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
    10256 
    10257 #define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
    10258     off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
    10259 
    10260 #define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
    10261     off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
    10262 
    10263 #define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
    10264     off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
    10265 
    10266 /** Emits code for OR'ing a local and a constant value.   */
    10267 DECL_INLINE_THROW(uint32_t)
    10268 iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
    10269 {
    10270 #ifdef VBOX_STRICT
    10271     switch (cbMask)
    10272     {
    10273         case sizeof(uint8_t):  Assert((uint8_t)uMask  == uMask); break;
    10274         case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
    10275         case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
    10276         case sizeof(uint64_t): break;
    10277         default: AssertFailedBreak();
    10278     }
    10279 #endif
    10280 
    10281     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
    10282     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
    10283 
    10284     if (cbMask <= sizeof(uint32_t))
    10285         off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
    10286     else
    10287         off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
    10288 
    10289     iemNativeVarRegisterRelease(pReNative, idxVar);
    10290     return off;
    10291 }
    10292 
    10293 
    10294 #define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
    10295     off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
    10296 
    10297 #define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
    10298     off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
    10299 
    10300 #define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
    10301     off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
    10302 
    10303 /** Emits code for reversing the byte order in a local value.   */
    10304 DECL_INLINE_THROW(uint32_t)
    10305 iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
    10306 {
    10307     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
    10308     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
    10309 
    10310     switch (cbLocal)
    10311     {
    10312         case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
    10313         case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
    10314         case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg);   break;
    10315         default: AssertFailedBreak();
    10316     }
    10317 
    10318     iemNativeVarRegisterRelease(pReNative, idxVar);
    10319     return off;
    10320 }
    10321 
    10322 
    10323 
    10324 /*********************************************************************************************************************************
    10325 *   EFLAGS                                                                                                                       *
    10326 *********************************************************************************************************************************/
    10327 
    10328 #if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
    10329 # define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput)     ((void)0)
    10330 #else
    10331 # define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
    10332     iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
    10333 
    10334 DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
    10335 {
    10336     if (fEflOutput)
    10337     {
    10338         PVMCPUCC const pVCpu = pReNative->pVCpu;
    10339 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    10340         IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
    10341         IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
    10342         AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
    10343 #  define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
    10344             if (fEflOutput & (a_fEfl)) \
    10345             { \
    10346                 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
    10347                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
    10348                 else \
    10349                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
    10350             } else do { } while (0)
    10351 # else
    10352         PCIEMLIVENESSENTRY const pLivenessEntry       = &pReNative->paLivenessEntries[pReNative->idxCurCall];
    10353         IEMLIVENESSBIT const     LivenessClobbered    =
    10354         {
    10355               pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    10356             & ~(  pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    10357                 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
    10358                 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
    10359         };
    10360         IEMLIVENESSBIT const     LivenessDelayable =
    10361         {
    10362               pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    10363             & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
    10364             & ~(  pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    10365                 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
    10366         };
    10367 #  define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
    10368             if (fEflOutput & (a_fEfl)) \
    10369             { \
    10370                 if (LivenessClobbered.a_fLivenessMember) \
    10371                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
    10372                 else if (LivenessDelayable.a_fLivenessMember) \
    10373                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
    10374                 else \
    10375                     STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
    10376             } else do { } while (0)
    10377 # endif
    10378         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
    10379         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
    10380         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
    10381         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
    10382         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
    10383         CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
    10384         //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
    10385 # undef CHECK_FLAG_AND_UPDATE_STATS
    10386     }
    10387     RT_NOREF(fEflInput);
    10388 }
    10389 #endif /* VBOX_WITH_STATISTICS */
    10390 
    10391 #undef  IEM_MC_FETCH_EFLAGS /* should not be used */
    10392 #define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
    10393     off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
    10394 
    10395 /** Handles IEM_MC_FETCH_EFLAGS_EX. */
    10396 DECL_INLINE_THROW(uint32_t)
    10397 iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
    10398                          uint32_t fEflInput, uint32_t fEflOutput)
    10399 {
    10400     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
    10401     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
    10402     RT_NOREF(fEflInput, fEflOutput);
    10403 
    10404 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    10405 # ifdef VBOX_STRICT
    10406     if (   pReNative->idxCurCall != 0
    10407         && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
    10408     {
    10409         PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
    10410         uint32_t const           fBoth          = fEflInput | fEflOutput;
    10411 # define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
    10412             AssertMsg(   !(fBoth & (a_fElfConst)) \
    10413                       || (!(fEflInput & (a_fElfConst)) \
    10414                           ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
    10415                           : !(fEflOutput & (a_fElfConst)) \
    10416                           ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED(  iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
    10417                           : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
    10418                       ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
    10419         ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
    10420         ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
    10421         ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
    10422         ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
    10423         ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
    10424         ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
    10425         ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
    10426 # undef ASSERT_ONE_EFL
    10427     }
    10428 # endif
    10429 #endif
    10430 
    10431     /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
    10432      *        the existing shadow copy. */
    10433     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
    10434     iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
    10435     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
    10436     iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
    10437     return off;
    10438 }
    10439 
    10440 
    10441 
    10442 /** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
    10443  * start using it with custom native code emission (inlining assembly
    10444  * instruction helpers). */
    10445 #undef  IEM_MC_COMMIT_EFLAGS /* should not be used */
    10446 #define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
    10447     IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
    10448     off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
    10449 
    10450 /** Handles IEM_MC_COMMIT_EFLAGS_EX. */
    10451 DECL_INLINE_THROW(uint32_t)
    10452 iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
    10453 {
    10454     RT_NOREF(fEflOutput);
    10455     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
    10456     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
    10457 
    10458 #ifdef VBOX_STRICT
    10459     off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
    10460     uint32_t offFixup = off;
    10461     off = iemNativeEmitJnzToFixed(pReNative, off, off);
    10462     off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
    10463     iemNativeFixupFixedJump(pReNative, offFixup, off);
    10464 
    10465     off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
    10466     offFixup = off;
    10467     off = iemNativeEmitJzToFixed(pReNative, off, off);
    10468     off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
    10469     iemNativeFixupFixedJump(pReNative, offFixup, off);
    10470 
    10471     /** @todo validate that only bits in the fElfOutput mask changed. */
    10472 #endif
    10473 
    10474     iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
    10475     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
    10476     iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
    10477     return off;
    10478 }
    10479 
    10480 
    10481 
    10482 /*********************************************************************************************************************************
    10483 *   Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
    10484 *********************************************************************************************************************************/
    10485 
    10486 #define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
    10487     off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
    10488 
    10489 #define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
    10490     off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
    10491 
    10492 #define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
    10493     off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
    10494 
    10495 
    10496 /** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
    10497  *  IEM_MC_FETCH_SREG_ZX_U64. */
    10498 DECL_INLINE_THROW(uint32_t)
    10499 iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
    10500 {
    10501     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    10502     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
    10503     Assert(iSReg < X86_SREG_COUNT);
    10504 
    10505     /*
    10506      * For now, we will not create a shadow copy of a selector.  The rational
    10507      * is that since we do not recompile the popping and loading of segment
    10508      * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
    10509      * pushing and moving to registers, there is only a small chance that the
    10510      * shadow copy will be accessed again before the register is reloaded.  One
    10511      * scenario would be nested called in 16-bit code, but I doubt it's worth
    10512      * the extra register pressure atm.
    10513      *
    10514      * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
    10515      * and iemNativeVarRegisterAcquire for a load scenario. We only got the
    10516      * store scencario covered at present (r160730).
    10517      */
    10518     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    10519     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    10520     off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
    10521     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    10522     return off;
    10523 }
    10524 
    10525 
    10526 
    10527 /*********************************************************************************************************************************
    10528 *   Register references.                                                                                                         *
    10529 *********************************************************************************************************************************/
    10530 
    10531 #define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
    10532     off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
    10533 
    10534 #define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
    10535     off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
    10536 
    10537 /** Handles IEM_MC_REF_GREG_U8[_CONST]. */
    10538 DECL_INLINE_THROW(uint32_t)
    10539 iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
    10540 {
    10541     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
    10542     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
    10543     Assert(iGRegEx < 20);
    10544 
    10545     if (iGRegEx < 16)
    10546         iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
    10547     else
    10548         iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
    10549 
    10550     /* If we've delayed writing back the register value, flush it now. */
    10551     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
    10552 
    10553     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    10554     if (!fConst)
    10555         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
    10556 
    10557     return off;
    10558 }
    10559 
    10560 #define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
    10561     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
    10562 
    10563 #define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
    10564     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
    10565 
    10566 #define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
    10567     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
    10568 
    10569 #define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
    10570     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
    10571 
    10572 #define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
    10573     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
    10574 
    10575 #define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
    10576     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
    10577 
    10578 #define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
    10579     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
    10580 
    10581 #define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
    10582     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
    10583 
    10584 #define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
    10585     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
    10586 
    10587 #define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
    10588     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
    10589 
    10590 /** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
    10591 DECL_INLINE_THROW(uint32_t)
    10592 iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
    10593 {
    10594     Assert(iGReg < 16);
    10595     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
    10596     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
    10597 
    10598     /* If we've delayed writing back the register value, flush it now. */
    10599     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
    10600 
    10601     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    10602     if (!fConst)
    10603         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
    10604 
    10605     return off;
    10606 }
    10607 
    10608 
    10609 #undef  IEM_MC_REF_EFLAGS /* should not be used. */
    10610 #define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
    10611     IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
    10612     off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
    10613 
    10614 /** Handles IEM_MC_REF_EFLAGS. */
    10615 DECL_INLINE_THROW(uint32_t)
    10616 iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
    10617 {
    10618     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
    10619     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
    10620 
    10621     /* If we've delayed writing back the register value, flush it now. */
    10622     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
    10623 
    10624     /* If there is a shadow copy of guest EFLAGS, flush it now. */
    10625     iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
    10626 
    10627     return off;
    10628 }
    10629 
    10630 
    10631 /** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds?  Once we emit
    10632  * different code from threaded recompiler, maybe it would be helpful. For now
    10633  * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
    10634 #define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
    10635 
    10636 
    10637 
    10638 /*********************************************************************************************************************************
    10639 *   Effective Address Calculation                                                                                                *
    10640 *********************************************************************************************************************************/
    10641 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
    10642     off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
    10643 
    10644 /** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
    10645  * @sa iemOpHlpCalcRmEffAddrThreadedAddr16  */
    10646 DECL_INLINE_THROW(uint32_t)
    10647 iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    10648                                          uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
    10649 {
    10650     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    10651 
    10652     /*
    10653      * Handle the disp16 form with no registers first.
    10654      *
    10655      * Convert to an immediate value, as that'll delay the register allocation
    10656      * and assignment till the memory access / call / whatever and we can use
    10657      * a more appropriate register (or none at all).
    10658      */
    10659     if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
    10660     {
    10661         iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
    10662         return off;
    10663     }
    10664 
    10665     /* Determin the displacment. */
    10666     uint16_t u16EffAddr;
    10667     switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    10668     {
    10669         case 0:  u16EffAddr = 0;                        break;
    10670         case 1:  u16EffAddr = (int16_t)(int8_t)u16Disp; break;
    10671         case 2:  u16EffAddr = u16Disp;                  break;
    10672         default: AssertFailedStmt(u16EffAddr = 0);
    10673     }
    10674 
    10675     /* Determine the registers involved. */
    10676     uint8_t idxGstRegBase;
    10677     uint8_t idxGstRegIndex;
    10678     switch (bRm & X86_MODRM_RM_MASK)
    10679     {
    10680         case 0:
    10681             idxGstRegBase  = X86_GREG_xBX;
    10682             idxGstRegIndex = X86_GREG_xSI;
    10683             break;
    10684         case 1:
    10685             idxGstRegBase  = X86_GREG_xBX;
    10686             idxGstRegIndex = X86_GREG_xDI;
    10687             break;
    10688         case 2:
    10689             idxGstRegBase  = X86_GREG_xBP;
    10690             idxGstRegIndex = X86_GREG_xSI;
    10691             break;
    10692         case 3:
    10693             idxGstRegBase  = X86_GREG_xBP;
    10694             idxGstRegIndex = X86_GREG_xDI;
    10695             break;
    10696         case 4:
    10697             idxGstRegBase  = X86_GREG_xSI;
    10698             idxGstRegIndex = UINT8_MAX;
    10699             break;
    10700         case 5:
    10701             idxGstRegBase  = X86_GREG_xDI;
    10702             idxGstRegIndex = UINT8_MAX;
    10703             break;
    10704         case 6:
    10705             idxGstRegBase  = X86_GREG_xBP;
    10706             idxGstRegIndex = UINT8_MAX;
    10707             break;
    10708 #ifdef _MSC_VER  /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
    10709         default:
    10710 #endif
    10711         case 7:
    10712             idxGstRegBase  = X86_GREG_xBX;
    10713             idxGstRegIndex = UINT8_MAX;
    10714             break;
    10715     }
    10716 
    10717     /*
    10718      * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
    10719      */
    10720     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    10721     uint8_t const idxRegBase  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    10722                                                                kIemNativeGstRegUse_ReadOnly);
    10723     uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
    10724                               ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    10725                                                                kIemNativeGstRegUse_ReadOnly)
    10726                               : UINT8_MAX;
    10727 #ifdef RT_ARCH_AMD64
    10728     if (idxRegIndex == UINT8_MAX)
    10729     {
    10730         if (u16EffAddr == 0)
    10731         {
    10732             /* movxz ret, base */
    10733             off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
    10734         }
    10735         else
    10736         {
    10737             /* lea ret32, [base64 + disp32] */
    10738             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    10739             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    10740             if (idxRegRet >= 8 || idxRegBase >= 8)
    10741                 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
    10742             pbCodeBuf[off++] = 0x8d;
    10743             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    10744                 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
    10745             else
    10746             {
    10747                 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
    10748                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    10749             }
    10750             pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
    10751             pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
    10752             pbCodeBuf[off++] = 0;
    10753             pbCodeBuf[off++] = 0;
    10754             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10755 
    10756             off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
    10757         }
    10758     }
    10759     else
    10760     {
    10761         /* lea ret32, [index64 + base64 (+ disp32)] */
    10762         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    10763         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    10764         if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    10765             pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    10766                              | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    10767                              | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    10768         pbCodeBuf[off++] = 0x8d;
    10769         uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
    10770         pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    10771         pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
    10772         if (bMod == X86_MOD_MEM4)
    10773         {
    10774             pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
    10775             pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
    10776             pbCodeBuf[off++] = 0;
    10777             pbCodeBuf[off++] = 0;
    10778         }
    10779         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10780         off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
    10781     }
    10782 
    10783 #elif defined(RT_ARCH_ARM64)
    10784     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    10785     if (u16EffAddr == 0)
    10786     {
    10787         if (idxRegIndex == UINT8_MAX)
    10788             pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
    10789         else
    10790         {
    10791             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
    10792             pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
    10793         }
    10794     }
    10795     else
    10796     {
    10797         if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
    10798             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
    10799         else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
    10800             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
    10801                                                              (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
    10802         else
    10803         {
    10804             pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
    10805             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
    10806         }
    10807         if (idxRegIndex != UINT8_MAX)
    10808             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
    10809         pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
    10810     }
    10811 
    10812 #else
    10813 # error "port me"
    10814 #endif
    10815 
    10816     if (idxRegIndex != UINT8_MAX)
    10817         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    10818     iemNativeRegFreeTmp(pReNative, idxRegBase);
    10819     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    10820     return off;
    10821 }
    10822 
    10823 
    10824 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
    10825     off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
    10826 
    10827 /** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
    10828  * @see iemOpHlpCalcRmEffAddrThreadedAddr32  */
    10829 DECL_INLINE_THROW(uint32_t)
    10830 iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    10831                                          uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
    10832 {
    10833     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    10834 
    10835     /*
    10836      * Handle the disp32 form with no registers first.
    10837      *
    10838      * Convert to an immediate value, as that'll delay the register allocation
    10839      * and assignment till the memory access / call / whatever and we can use
    10840      * a more appropriate register (or none at all).
    10841      */
    10842     if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
    10843     {
    10844         iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
    10845         return off;
    10846     }
    10847 
    10848     /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
    10849     uint32_t u32EffAddr = 0;
    10850     switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    10851     {
    10852         case 0: break;
    10853         case 1: u32EffAddr = (int8_t)u32Disp; break;
    10854         case 2: u32EffAddr = u32Disp; break;
    10855         default: AssertFailed();
    10856     }
    10857 
    10858     /* Get the register (or SIB) value. */
    10859     uint8_t idxGstRegBase  = UINT8_MAX;
    10860     uint8_t idxGstRegIndex = UINT8_MAX;
    10861     uint8_t cShiftIndex    = 0;
    10862     switch (bRm & X86_MODRM_RM_MASK)
    10863     {
    10864         case 0: idxGstRegBase = X86_GREG_xAX; break;
    10865         case 1: idxGstRegBase = X86_GREG_xCX; break;
    10866         case 2: idxGstRegBase = X86_GREG_xDX; break;
    10867         case 3: idxGstRegBase = X86_GREG_xBX; break;
    10868         case 4: /* SIB */
    10869         {
    10870             /* index /w scaling . */
    10871             cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
    10872             switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
    10873             {
    10874                 case 0: idxGstRegIndex = X86_GREG_xAX; break;
    10875                 case 1: idxGstRegIndex = X86_GREG_xCX; break;
    10876                 case 2: idxGstRegIndex = X86_GREG_xDX; break;
    10877                 case 3: idxGstRegIndex = X86_GREG_xBX; break;
    10878                 case 4: cShiftIndex    = 0; /*no index*/ break;
    10879                 case 5: idxGstRegIndex = X86_GREG_xBP; break;
    10880                 case 6: idxGstRegIndex = X86_GREG_xSI; break;
    10881                 case 7: idxGstRegIndex = X86_GREG_xDI; break;
    10882             }
    10883 
    10884             /* base */
    10885             switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
    10886             {
    10887                 case 0: idxGstRegBase = X86_GREG_xAX; break;
    10888                 case 1: idxGstRegBase = X86_GREG_xCX; break;
    10889                 case 2: idxGstRegBase = X86_GREG_xDX; break;
    10890                 case 3: idxGstRegBase = X86_GREG_xBX; break;
    10891                 case 4:
    10892                     idxGstRegBase     = X86_GREG_xSP;
    10893                     u32EffAddr       += uSibAndRspOffset >> 8;
    10894                     break;
    10895                 case 5:
    10896                     if ((bRm & X86_MODRM_MOD_MASK) != 0)
    10897                         idxGstRegBase = X86_GREG_xBP;
    10898                     else
    10899                     {
    10900                         Assert(u32EffAddr == 0);
    10901                         u32EffAddr    = u32Disp;
    10902                     }
    10903                     break;
    10904                 case 6: idxGstRegBase = X86_GREG_xSI; break;
    10905                 case 7: idxGstRegBase = X86_GREG_xDI; break;
    10906             }
    10907             break;
    10908         }
    10909         case 5: idxGstRegBase = X86_GREG_xBP; break;
    10910         case 6: idxGstRegBase = X86_GREG_xSI; break;
    10911         case 7: idxGstRegBase = X86_GREG_xDI; break;
    10912     }
    10913 
    10914     /*
    10915      * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
    10916      * the start of the function.
    10917      */
    10918     if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
    10919     {
    10920         iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
    10921         return off;
    10922     }
    10923 
    10924     /*
    10925      * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    10926      */
    10927     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    10928     uint8_t       idxRegBase  = idxGstRegBase == UINT8_MAX ? UINT8_MAX
    10929                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    10930                                                                 kIemNativeGstRegUse_ReadOnly);
    10931     uint8_t       idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
    10932                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    10933                                                                kIemNativeGstRegUse_ReadOnly);
    10934 
    10935     /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
    10936     if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
    10937     {
    10938         idxRegBase  = idxRegIndex;
    10939         idxRegIndex = UINT8_MAX;
    10940     }
    10941 
    10942 #ifdef RT_ARCH_AMD64
    10943     if (idxRegIndex == UINT8_MAX)
    10944     {
    10945         if (u32EffAddr == 0)
    10946         {
    10947             /* mov ret, base */
    10948             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    10949         }
    10950         else
    10951         {
    10952             /* lea ret32, [base64 + disp32] */
    10953             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    10954             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    10955             if (idxRegRet >= 8 || idxRegBase >= 8)
    10956                 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
    10957             pbCodeBuf[off++] = 0x8d;
    10958             uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
    10959             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    10960                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
    10961             else
    10962             {
    10963                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    10964                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    10965             }
    10966             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    10967             if (bMod == X86_MOD_MEM4)
    10968             {
    10969                 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    10970                 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    10971                 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    10972             }
    10973             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10974         }
    10975     }
    10976     else
    10977     {
    10978         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    10979         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    10980         if (idxRegBase == UINT8_MAX)
    10981         {
    10982             /* lea ret32, [(index64 << cShiftIndex) + disp32] */
    10983             if (idxRegRet >= 8 || idxRegIndex >= 8)
    10984                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    10985                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    10986             pbCodeBuf[off++] = 0x8d;
    10987             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
    10988             pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
    10989             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    10990             pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    10991             pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    10992             pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    10993         }
    10994         else
    10995         {
    10996             /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
    10997             if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    10998                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    10999                                  | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    11000                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    11001             pbCodeBuf[off++] = 0x8d;
    11002             uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
    11003                                : (int8_t)u32EffAddr == (int32_t)u32EffAddr           ? X86_MOD_MEM1 : X86_MOD_MEM4;
    11004             pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    11005             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
    11006             if (bMod != X86_MOD_MEM0)
    11007             {
    11008                 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    11009                 if (bMod == X86_MOD_MEM4)
    11010                 {
    11011                     pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    11012                     pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    11013                     pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    11014                 }
    11015             }
    11016         }
    11017         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11018     }
    11019 
    11020 #elif defined(RT_ARCH_ARM64)
    11021     if (u32EffAddr == 0)
    11022     {
    11023         if (idxRegIndex == UINT8_MAX)
    11024             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    11025         else if (idxRegBase == UINT8_MAX)
    11026         {
    11027             if (cShiftIndex == 0)
    11028                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
    11029             else
    11030             {
    11031                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11032                 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
    11033             }
    11034         }
    11035         else
    11036         {
    11037             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11038             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
    11039                                                           false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
    11040         }
    11041     }
    11042     else
    11043     {
    11044         if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
    11045         {
    11046             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11047             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
    11048         }
    11049         else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
    11050         {
    11051             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11052             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
    11053                                                              (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
    11054         }
    11055         else
    11056         {
    11057             off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
    11058             if (idxRegBase != UINT8_MAX)
    11059             {
    11060                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11061                 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
    11062             }
    11063         }
    11064         if (idxRegIndex != UINT8_MAX)
    11065         {
    11066             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11067             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
    11068                                                           false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
    11069         }
    11070     }
    11071 
    11072 #else
    11073 # error "port me"
    11074 #endif
    11075 
    11076     if (idxRegIndex != UINT8_MAX)
    11077         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    11078     if (idxRegBase != UINT8_MAX)
    11079         iemNativeRegFreeTmp(pReNative, idxRegBase);
    11080     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    11081     return off;
    11082 }
    11083 
    11084 
    11085 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    11086     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    11087                                                    a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
    11088 
    11089 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    11090     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    11091                                                    a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
    11092 
    11093 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    11094     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    11095                                                    a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
    11096 
    11097 /**
    11098  * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
    11099  *
    11100  * @returns New off.
    11101  * @param   pReNative           .
    11102  * @param   off                 .
    11103  * @param   bRmEx               The ModRM byte but with bit 3 set to REX.B and
    11104  *                              bit 4 to REX.X.  The two bits are part of the
    11105  *                              REG sub-field, which isn't needed in this
    11106  *                              function.
    11107  * @param   uSibAndRspOffset    Two parts:
    11108  *                                - The first 8 bits make up the SIB byte.
    11109  *                                - The next 8 bits are the fixed RSP/ESP offset
    11110  *                                  in case of a pop [xSP].
    11111  * @param   u32Disp             The displacement byte/word/dword, if any.
    11112  * @param   cbInstr             The size of the fully decoded instruction. Used
    11113  *                              for RIP relative addressing.
    11114  * @param   idxVarRet           The result variable number.
    11115  * @param   f64Bit              Whether to use a 64-bit or 32-bit address size
    11116  *                              when calculating the address.
    11117  *
    11118  * @see iemOpHlpCalcRmEffAddrThreadedAddr64
    11119  */
    11120 DECL_INLINE_THROW(uint32_t)
    11121 iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
    11122                                          uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
    11123 {
    11124     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    11125 
    11126     /*
    11127      * Special case the rip + disp32 form first.
    11128      */
    11129     if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
    11130     {
    11131         uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    11132         uint8_t const idxRegPc  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
    11133                                                                   kIemNativeGstRegUse_ReadOnly);
    11134 #ifdef RT_ARCH_AMD64
    11135         if (f64Bit)
    11136         {
    11137             int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
    11138             if ((int32_t)offFinalDisp == offFinalDisp)
    11139                 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
    11140             else
    11141             {
    11142                 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
    11143                 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
    11144             }
    11145         }
    11146         else
    11147             off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
    11148 
    11149 #elif defined(RT_ARCH_ARM64)
    11150         if (f64Bit)
    11151             off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
    11152                                                                  (int64_t)(int32_t)u32Disp + cbInstr);
    11153         else
    11154             off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
    11155                                                                    (int32_t)u32Disp + cbInstr);
    11156 
    11157 #else
    11158 # error "Port me!"
    11159 #endif
    11160         iemNativeRegFreeTmp(pReNative, idxRegPc);
    11161         iemNativeVarRegisterRelease(pReNative, idxVarRet);
    11162         return off;
    11163     }
    11164 
    11165     /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
    11166     int64_t i64EffAddr = 0;
    11167     switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    11168     {
    11169         case 0: break;
    11170         case 1: i64EffAddr = (int8_t)u32Disp; break;
    11171         case 2: i64EffAddr = (int32_t)u32Disp; break;
    11172         default: AssertFailed();
    11173     }
    11174 
    11175     /* Get the register (or SIB) value. */
    11176     uint8_t idxGstRegBase  = UINT8_MAX;
    11177     uint8_t idxGstRegIndex = UINT8_MAX;
    11178     uint8_t cShiftIndex    = 0;
    11179     if ((bRmEx & X86_MODRM_RM_MASK) != 4)
    11180         idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
    11181     else /* SIB: */
    11182     {
    11183         /* index /w scaling . */
    11184         cShiftIndex    = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
    11185         idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
    11186                        | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
    11187         if (idxGstRegIndex == 4)
    11188         {
    11189             /* no index */
    11190             cShiftIndex    = 0;
    11191             idxGstRegIndex = UINT8_MAX;
    11192         }
    11193 
    11194         /* base */
    11195         idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
    11196         if (idxGstRegBase == 4)
    11197         {
    11198             /* pop [rsp] hack */
    11199             i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
    11200         }
    11201         else if (   (idxGstRegBase & X86_SIB_BASE_MASK) == 5
    11202                  && (bRmEx & X86_MODRM_MOD_MASK) == 0)
    11203         {
    11204             /* mod=0 and base=5 -> disp32, no base reg. */
    11205             Assert(i64EffAddr == 0);
    11206             i64EffAddr    = (int32_t)u32Disp;
    11207             idxGstRegBase = UINT8_MAX;
    11208         }
    11209     }
    11210 
    11211     /*
    11212      * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
    11213      * the start of the function.
    11214      */
    11215     if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
    11216     {
    11217         if (f64Bit)
    11218             iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
    11219         else
    11220             iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
    11221         return off;
    11222     }
    11223 
    11224     /*
    11225      * Now emit code that calculates:
    11226      *      idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    11227      * or if !f64Bit:
    11228      *      idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    11229      */
    11230     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    11231     uint8_t       idxRegBase  = idxGstRegBase == UINT8_MAX ? UINT8_MAX
    11232                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    11233                                                                 kIemNativeGstRegUse_ReadOnly);
    11234     uint8_t       idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
    11235                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    11236                                                                kIemNativeGstRegUse_ReadOnly);
    11237 
    11238     /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
    11239     if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
    11240     {
    11241         idxRegBase  = idxRegIndex;
    11242         idxRegIndex = UINT8_MAX;
    11243     }
    11244 
    11245 #ifdef RT_ARCH_AMD64
    11246     uint8_t bFinalAdj;
    11247     if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
    11248         bFinalAdj = 0; /* likely */
    11249     else
    11250     {
    11251         /* pop [rsp] with a problematic disp32 value.  Split out the
    11252            RSP offset and add it separately afterwards (bFinalAdj). */
    11253         /** @todo testcase: pop [rsp] with problematic disp32 (mod4).   */
    11254         Assert(idxGstRegBase == X86_GREG_xSP);
    11255         Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
    11256         bFinalAdj   = (uint8_t)(uSibAndRspOffset >> 8);
    11257         Assert(bFinalAdj != 0);
    11258         i64EffAddr -= bFinalAdj;
    11259         Assert((int32_t)i64EffAddr == i64EffAddr);
    11260     }
    11261     uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
    11262 //pReNative->pInstrBuf[off++] = 0xcc;
    11263 
    11264     if (idxRegIndex == UINT8_MAX)
    11265     {
    11266         if (u32EffAddr == 0)
    11267         {
    11268             /* mov ret, base */
    11269             if (f64Bit)
    11270                 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
    11271             else
    11272                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    11273         }
    11274         else
    11275         {
    11276             /* lea ret, [base + disp32] */
    11277             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    11278             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    11279             if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
    11280                 pbCodeBuf[off++] = (idxRegRet  >= 8 ? X86_OP_REX_R : 0)
    11281                                  | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
    11282                                  | (f64Bit          ? X86_OP_REX_W : 0);
    11283             pbCodeBuf[off++] = 0x8d;
    11284             uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
    11285             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    11286                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
    11287             else
    11288             {
    11289                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    11290                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    11291             }
    11292             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    11293             if (bMod == X86_MOD_MEM4)
    11294             {
    11295                 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    11296                 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    11297                 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    11298             }
    11299             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11300         }
    11301     }
    11302     else
    11303     {
    11304         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    11305         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    11306         if (idxRegBase == UINT8_MAX)
    11307         {
    11308             /* lea ret, [(index64 << cShiftIndex) + disp32] */
    11309             if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
    11310                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    11311                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
    11312                                  | (f64Bit           ? X86_OP_REX_W : 0);
    11313             pbCodeBuf[off++] = 0x8d;
    11314             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
    11315             pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
    11316             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    11317             pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    11318             pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    11319             pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    11320         }
    11321         else
    11322         {
    11323             /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
    11324             if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    11325                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    11326                                  | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    11327                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
    11328                                  | (f64Bit           ? X86_OP_REX_W : 0);
    11329             pbCodeBuf[off++] = 0x8d;
    11330             uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
    11331                                : (int8_t)u32EffAddr == (int32_t)u32EffAddr           ? X86_MOD_MEM1 : X86_MOD_MEM4;
    11332             pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    11333             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
    11334             if (bMod != X86_MOD_MEM0)
    11335             {
    11336                 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    11337                 if (bMod == X86_MOD_MEM4)
    11338                 {
    11339                     pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    11340                     pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    11341                     pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    11342                 }
    11343             }
    11344         }
    11345         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11346     }
    11347 
    11348     if (!bFinalAdj)
    11349     { /* likely */ }
    11350     else
    11351     {
    11352         Assert(f64Bit);
    11353         off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
    11354     }
    11355 
    11356 #elif defined(RT_ARCH_ARM64)
    11357     if (i64EffAddr == 0)
    11358     {
    11359         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11360         if (idxRegIndex == UINT8_MAX)
    11361             pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
    11362         else if (idxRegBase != UINT8_MAX)
    11363             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
    11364                                                           f64Bit, false /*fSetFlags*/, cShiftIndex);
    11365         else
    11366         {
    11367             Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
    11368             pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
    11369         }
    11370     }
    11371     else
    11372     {
    11373         if (f64Bit)
    11374         { /* likely */ }
    11375         else
    11376             i64EffAddr = (int32_t)i64EffAddr;
    11377 
    11378         if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
    11379         {
    11380             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11381             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
    11382         }
    11383         else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
    11384         {
    11385             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11386             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
    11387         }
    11388         else
    11389         {
    11390             if (f64Bit)
    11391                 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
    11392             else
    11393                 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
    11394             if (idxRegBase != UINT8_MAX)
    11395             {
    11396                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11397                 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
    11398             }
    11399         }
    11400         if (idxRegIndex != UINT8_MAX)
    11401         {
    11402             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    11403             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
    11404                                                           f64Bit, false /*fSetFlags*/, cShiftIndex);
    11405         }
    11406     }
    11407 
    11408 #else
    11409 # error "port me"
    11410 #endif
    11411 
    11412     if (idxRegIndex != UINT8_MAX)
    11413         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    11414     if (idxRegBase != UINT8_MAX)
    11415         iemNativeRegFreeTmp(pReNative, idxRegBase);
    11416     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    11417     return off;
    11418 }
    11419 
    11420 
    11421 /*********************************************************************************************************************************
    11422 *   TLB Lookup.                                                                                                                  *
    11423 *********************************************************************************************************************************/
    11424 
    11425 /**
    11426  * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
    11427  */
    11428 DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
    11429 {
    11430     uint8_t const  iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
    11431     uint8_t const  cbMem   = RT_BYTE2(uSegAndSizeAndAccess);
    11432     uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
    11433     Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
    11434 
    11435     /* Do the lookup manually. */
    11436     RTGCPTR const      GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
    11437     uint64_t const     uTag      = IEMTLB_CALC_TAG(    &pVCpu->iem.s.DataTlb, GCPtrFlat);
    11438     PIEMTLBENTRY const pTlbe     = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
    11439     if (RT_LIKELY(pTlbe->uTag == uTag))
    11440     {
    11441         /*
    11442          * Check TLB page table level access flags.
    11443          */
    11444         AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
    11445         uint64_t const fNoUser          = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
    11446         uint64_t const fNoWriteNoDirty  = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
    11447                                         : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
    11448         uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & (  IEMTLBE_F_PHYS_REV       | IEMTLBE_F_NO_MAPPINGR3
    11449                                                                      | IEMTLBE_F_PG_UNASSIGNED
    11450                                                                      | IEMTLBE_F_PT_NO_ACCESSED
    11451                                                                      | fNoWriteNoDirty          | fNoUser);
    11452         uint64_t const uTlbPhysRev      = pVCpu->iem.s.DataTlb.uTlbPhysRev;
    11453         if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
    11454         {
    11455             /*
    11456              * Return the address.
    11457              */
    11458             uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
    11459             if ((uintptr_t)pbAddr == uResult)
    11460                 return;
    11461             RT_NOREF(cbMem);
    11462             AssertFailed();
    11463         }
    11464         else
    11465             AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
    11466                              fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
    11467     }
    11468     else
    11469         AssertFailed();
    11470     RT_BREAKPOINT();
    11471 }
    11472 
    11473 /* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
    11474 
    11475 
    11476 /*********************************************************************************************************************************
    11477 *   Memory fetches and stores common                                                                                             *
    11478 *********************************************************************************************************************************/
    11479 
    11480 typedef enum IEMNATIVEMITMEMOP
    11481 {
    11482     kIemNativeEmitMemOp_Store = 0,
    11483     kIemNativeEmitMemOp_Fetch,
    11484     kIemNativeEmitMemOp_Fetch_Zx_U16,
    11485     kIemNativeEmitMemOp_Fetch_Zx_U32,
    11486     kIemNativeEmitMemOp_Fetch_Zx_U64,
    11487     kIemNativeEmitMemOp_Fetch_Sx_U16,
    11488     kIemNativeEmitMemOp_Fetch_Sx_U32,
    11489     kIemNativeEmitMemOp_Fetch_Sx_U64
    11490 } IEMNATIVEMITMEMOP;
    11491 
    11492 /** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
    11493  * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
    11494  * (with iSegReg = UINT8_MAX). */
    11495 DECL_INLINE_THROW(uint32_t)
    11496 iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off,  uint8_t idxVarValue, uint8_t iSegReg,
    11497                                      uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
    11498                                      uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
    11499 {
    11500     /*
    11501      * Assert sanity.
    11502      */
    11503     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
    11504     PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
    11505     Assert(   enmOp != kIemNativeEmitMemOp_Store
    11506            || pVarValue->enmKind == kIemNativeVarKind_Immediate
    11507            || pVarValue->enmKind == kIemNativeVarKind_Stack);
    11508     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
    11509     PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
    11510     AssertStmt(   pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
    11511                || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
    11512                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    11513     Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
    11514     Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
    11515     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    11516 #ifdef VBOX_STRICT
    11517     if (iSegReg == UINT8_MAX)
    11518     {
    11519         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    11520                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    11521                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    11522         switch (cbMem)
    11523         {
    11524             case 1:
    11525                 Assert(   pfnFunction
    11526                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
    11527                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    11528                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    11529                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    11530                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    11531                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
    11532                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
    11533                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
    11534                            : UINT64_C(0xc000b000a0009000) ));
    11535                 break;
    11536             case 2:
    11537                 Assert(   pfnFunction
    11538                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
    11539                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    11540                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    11541                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    11542                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
    11543                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
    11544                            : UINT64_C(0xc000b000a0009000) ));
    11545                 break;
    11546             case 4:
    11547                 Assert(   pfnFunction
    11548                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
    11549                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
    11550                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
    11551                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
    11552                            : UINT64_C(0xc000b000a0009000) ));
    11553                 break;
    11554             case 8:
    11555                 Assert(    pfnFunction
    11556                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
    11557                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
    11558                            : UINT64_C(0xc000b000a0009000) ));
    11559                 break;
    11560         }
    11561     }
    11562     else
    11563     {
    11564         Assert(iSegReg < 6);
    11565         switch (cbMem)
    11566         {
    11567             case 1:
    11568                 Assert(   pfnFunction
    11569                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU8
    11570                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU8
    11571                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    11572                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    11573                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    11574                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
    11575                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
    11576                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
    11577                            : UINT64_C(0xc000b000a0009000) ));
    11578                 break;
    11579             case 2:
    11580                 Assert(   pfnFunction
    11581                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU16
    11582                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU16
    11583                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
    11584                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
    11585                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
    11586                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
    11587                            : UINT64_C(0xc000b000a0009000) ));
    11588                 break;
    11589             case 4:
    11590                 Assert(   pfnFunction
    11591                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU32
    11592                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU32
    11593                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
    11594                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
    11595                            : UINT64_C(0xc000b000a0009000) ));
    11596                 break;
    11597             case 8:
    11598                 Assert(    pfnFunction
    11599                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU64
    11600                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU64
    11601                            : UINT64_C(0xc000b000a0009000) ));
    11602                 break;
    11603         }
    11604     }
    11605 #endif
    11606 
    11607 #ifdef VBOX_STRICT
    11608     /*
    11609      * Check that the fExec flags we've got make sense.
    11610      */
    11611     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    11612 #endif
    11613 
    11614     /*
    11615      * To keep things simple we have to commit any pending writes first as we
    11616      * may end up making calls.
    11617      */
    11618     /** @todo we could postpone this till we make the call and reload the
    11619      * registers after returning from the call. Not sure if that's sensible or
    11620      * not, though. */
    11621     off = iemNativeRegFlushPendingWrites(pReNative, off);
    11622 
    11623 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11624     /*
    11625      * Move/spill/flush stuff out of call-volatile registers.
    11626      * This is the easy way out. We could contain this to the tlb-miss branch
    11627      * by saving and restoring active stuff here.
    11628      */
    11629     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    11630 #endif
    11631 
    11632     /*
    11633      * Define labels and allocate the result register (trying for the return
    11634      * register if we can).
    11635      */
    11636     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    11637     uint8_t  const idxRegValueFetch  = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
    11638                                      : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
    11639                                      ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
    11640                                      : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
    11641     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
    11642     uint8_t  const idxRegValueStore  =    !TlbState.fSkip
    11643                                        && enmOp == kIemNativeEmitMemOp_Store
    11644                                        && pVarValue->enmKind != kIemNativeVarKind_Immediate
    11645                                      ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
    11646                                      : UINT8_MAX;
    11647     uint32_t const idxRegMemResult   = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
    11648     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    11649                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    11650                                      : UINT32_MAX;
    11651 
    11652     /*
    11653      * Jump to the TLB lookup code.
    11654      */
    11655     if (!TlbState.fSkip)
    11656         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    11657 
    11658     /*
    11659      * TlbMiss:
    11660      *
    11661      * Call helper to do the fetching.
    11662      * We flush all guest register shadow copies here.
    11663      */
    11664     uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
    11665 
    11666 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    11667     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    11668 #else
    11669     RT_NOREF(idxInstr);
    11670 #endif
    11671 
    11672 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11673     /* Save variables in volatile registers. */
    11674     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    11675                                      | (idxRegMemResult  != UINT8_MAX ? RT_BIT_32(idxRegMemResult)  : 0)
    11676                                      | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
    11677     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    11678 #endif
    11679 
    11680     /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
    11681     uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    11682     if (enmOp == kIemNativeEmitMemOp_Store)
    11683     {
    11684         uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
    11685         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
    11686 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11687                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    11688 #else
    11689                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
    11690         fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
    11691 #endif
    11692     }
    11693 
    11694     /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
    11695     off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
    11696 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11697                                                     fVolGregMask);
    11698 #else
    11699                                                     fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
    11700 #endif
    11701 
    11702     if (iSegReg != UINT8_MAX)
    11703     {
    11704         /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
    11705         AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
    11706         off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
    11707     }
    11708 
    11709     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    11710     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    11711 
    11712     /* Done setting up parameters, make the call. */
    11713     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    11714 
    11715     /*
    11716      * Put the result in the right register if this is a fetch.
    11717      */
    11718     if (enmOp != kIemNativeEmitMemOp_Store)
    11719     {
    11720         Assert(idxRegValueFetch == pVarValue->idxReg);
    11721         if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
    11722             off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
    11723     }
    11724 
    11725 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11726     /* Restore variables and guest shadow registers to volatile registers. */
    11727     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    11728     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    11729 #endif
    11730 
    11731 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    11732     if (!TlbState.fSkip)
    11733     {
    11734         /* end of TlbMiss - Jump to the done label. */
    11735         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    11736         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    11737 
    11738         /*
    11739          * TlbLookup:
    11740          */
    11741         off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
    11742                                            enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
    11743                                            idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
    11744 
    11745         /*
    11746          * Emit code to do the actual storing / fetching.
    11747          */
    11748         PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
    11749 # ifdef VBOX_WITH_STATISTICS
    11750         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    11751                                                   enmOp == kIemNativeEmitMemOp_Store
    11752                                                   ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
    11753                                                   : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
    11754 # endif
    11755         switch (enmOp)
    11756         {
    11757             case kIemNativeEmitMemOp_Store:
    11758                 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
    11759                 {
    11760                     switch (cbMem)
    11761                     {
    11762                         case 1:
    11763                             off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    11764                             break;
    11765                         case 2:
    11766                             off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    11767                             break;
    11768                         case 4:
    11769                             off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    11770                             break;
    11771                         case 8:
    11772                             off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    11773                             break;
    11774                         default:
    11775                             AssertFailed();
    11776                     }
    11777                 }
    11778                 else
    11779                 {
    11780                     switch (cbMem)
    11781                     {
    11782                         case 1:
    11783                             off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
    11784                                                                 idxRegMemResult, TlbState.idxReg1);
    11785                             break;
    11786                         case 2:
    11787                             off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
    11788                                                                  idxRegMemResult, TlbState.idxReg1);
    11789                             break;
    11790                         case 4:
    11791                             off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
    11792                                                                  idxRegMemResult, TlbState.idxReg1);
    11793                             break;
    11794                         case 8:
    11795                             off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
    11796                                                                  idxRegMemResult, TlbState.idxReg1);
    11797                             break;
    11798                         default:
    11799                             AssertFailed();
    11800                     }
    11801                 }
    11802                 break;
    11803 
    11804             case kIemNativeEmitMemOp_Fetch:
    11805             case kIemNativeEmitMemOp_Fetch_Zx_U16:
    11806             case kIemNativeEmitMemOp_Fetch_Zx_U32:
    11807             case kIemNativeEmitMemOp_Fetch_Zx_U64:
    11808                 switch (cbMem)
    11809                 {
    11810                     case 1:
    11811                         off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11812                         break;
    11813                     case 2:
    11814                         off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11815                         break;
    11816                     case 4:
    11817                         off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11818                         break;
    11819                     case 8:
    11820                         off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11821                         break;
    11822                     default:
    11823                         AssertFailed();
    11824                 }
    11825                 break;
    11826 
    11827             case kIemNativeEmitMemOp_Fetch_Sx_U16:
    11828                 Assert(cbMem == 1);
    11829                 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11830                 break;
    11831 
    11832             case kIemNativeEmitMemOp_Fetch_Sx_U32:
    11833                 Assert(cbMem == 1 || cbMem == 2);
    11834                 if (cbMem == 1)
    11835                     off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11836                 else
    11837                     off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11838                 break;
    11839 
    11840             case kIemNativeEmitMemOp_Fetch_Sx_U64:
    11841                 switch (cbMem)
    11842                 {
    11843                     case 1:
    11844                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11845                         break;
    11846                     case 2:
    11847                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11848                         break;
    11849                     case 4:
    11850                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11851                         break;
    11852                     default:
    11853                         AssertFailed();
    11854                 }
    11855                 break;
    11856 
    11857             default:
    11858                 AssertFailed();
    11859         }
    11860 
    11861         iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    11862 
    11863         /*
    11864          * TlbDone:
    11865          */
    11866         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    11867 
    11868         TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
    11869 
    11870 # ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11871         /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
    11872         iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    11873 # endif
    11874     }
    11875 #else
    11876     RT_NOREF(fAlignMask, idxLabelTlbMiss);
    11877 #endif
    11878 
    11879     if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
    11880         iemNativeVarRegisterRelease(pReNative, idxVarValue);
    11881     return off;
    11882 }
    11883 
    11884 
    11885 
    11886 /*********************************************************************************************************************************
    11887 *   Memory fetches (IEM_MEM_FETCH_XXX).                                                                                          *
    11888 *********************************************************************************************************************************/
    11889 
    11890 /* 8-bit segmented: */
    11891 #define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
    11892     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
    11893                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
    11894                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    11895 
    11896 #define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    11897     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    11898                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
    11899                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    11900 
    11901 #define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11902     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11903                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    11904                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    11905 
    11906 #define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11907     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11908                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11909                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    11910 
    11911 #define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    11912     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    11913                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
    11914                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
    11915 
    11916 #define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11917     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11918                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    11919                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
    11920 
    11921 #define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11922     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11923                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11924                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
    11925 
    11926 /* 16-bit segmented: */
    11927 #define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    11928     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    11929                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11930                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    11931 
    11932 #define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
    11933     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    11934                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11935                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
    11936 
    11937 #define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11938     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11939                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    11940                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    11941 
    11942 #define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11943     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11944                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11945                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    11946 
    11947 #define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11948     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11949                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    11950                                                (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
    11951 
    11952 #define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11953     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11954                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11955                                                (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
    11956 
    11957 
    11958 /* 32-bit segmented: */
    11959 #define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11960     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11961                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11962                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
    11963 
    11964 #define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
    11965     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11966                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11967                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
    11968 
    11969 #define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11970     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11971                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11972                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
    11973 
    11974 #define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11975     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11976                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11977                                                (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
    11978 
    11979 
    11980 /* 64-bit segmented: */
    11981 #define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11982     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11983                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11984                                                (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
    11985 
    11986 
    11987 
    11988 /* 8-bit flat: */
    11989 #define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
    11990     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
    11991                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
    11992                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    11993 
    11994 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
    11995     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    11996                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
    11997                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    11998 
    11999 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
    12000     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    12001                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    12002                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    12003 
    12004 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
    12005     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    12006                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    12007                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    12008 
    12009 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
    12010     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    12011                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
    12012                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
    12013 
    12014 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
    12015     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    12016                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    12017                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
    12018 
    12019 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
    12020     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    12021                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    12022                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
    12023 
    12024 
    12025 /* 16-bit flat: */
    12026 #define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
    12027     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    12028                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    12029                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    12030 
    12031 #define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
    12032     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    12033                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    12034                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
    12035 
    12036 #define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
    12037     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    12038                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    12039                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    12040 
    12041 #define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
    12042     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    12043                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    12044                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    12045 
    12046 #define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
    12047     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    12048                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    12049                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
    12050 
    12051 #define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
    12052     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    12053                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    12054                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
    12055 
    12056 /* 32-bit flat: */
    12057 #define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
    12058     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    12059                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    12060                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
    12061 
    12062 #define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
    12063     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    12064                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    12065                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
    12066 
    12067 #define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
    12068     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    12069                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    12070                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
    12071 
    12072 #define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
    12073     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    12074                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    12075                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
    12076 
    12077 /* 64-bit flat: */
    12078 #define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
    12079     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    12080                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
    12081                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
    12082 
    12083 
    12084 
    12085 /*********************************************************************************************************************************
    12086 *   Memory stores (IEM_MEM_STORE_XXX).                                                                                           *
    12087 *********************************************************************************************************************************/
    12088 
    12089 #define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
    12090     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
    12091                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
    12092                                                (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
    12093 
    12094 #define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
    12095     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
    12096                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
    12097                                                (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
    12098 
    12099 #define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
    12100     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
    12101                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
    12102                                                (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
    12103 
    12104 #define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
    12105     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
    12106                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
    12107                                                (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
    12108 
    12109 
    12110 #define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
    12111     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
    12112                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
    12113                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
    12114 
    12115 #define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
    12116     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
    12117                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
    12118                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
    12119 
    12120 #define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
    12121     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
    12122                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
    12123                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
    12124 
    12125 #define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
    12126     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
    12127                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
    12128                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
    12129 
    12130 
    12131 #define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
    12132     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    12133                                                (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
    12134 
    12135 #define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
    12136     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    12137                                                (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
    12138 
    12139 #define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
    12140     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    12141                                                (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
    12142 
    12143 #define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
    12144     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    12145                                                (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
    12146 
    12147 
    12148 #define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
    12149     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    12150                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
    12151 
    12152 #define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
    12153     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    12154                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
    12155 
    12156 #define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
    12157     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    12158                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
    12159 
    12160 #define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
    12161     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    12162                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
    12163 
    12164 /** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
    12165  *  IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
    12166 DECL_INLINE_THROW(uint32_t)
    12167 iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
    12168                                     uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
    12169 {
    12170     /*
    12171      * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
    12172      * to do the grunt work.
    12173      */
    12174     uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
    12175     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
    12176                                                cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
    12177                                                pfnFunction, idxInstr);
    12178     iemNativeVarFreeLocal(pReNative, idxVarConstValue);
    12179     return off;
    12180 }
    12181 
    12182 
    12183 
    12184 /*********************************************************************************************************************************
    12185 *   Stack Accesses.                                                                                                              *
    12186 *********************************************************************************************************************************/
    12187 /*                                                     RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
    12188 #define IEM_MC_PUSH_U16(a_u16Value) \
    12189     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16,  0, 0, 0), \
    12190                                  (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
    12191 #define IEM_MC_PUSH_U32(a_u32Value) \
    12192     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32,  0, 0, 0), \
    12193                                  (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
    12194 #define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
    12195     off = iemNativeEmitStackPush(pReNative, off, a_uSegVal,  RT_MAKE_U32_FROM_U8(32,  0, 1, 0), \
    12196                                  (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
    12197 #define IEM_MC_PUSH_U64(a_u64Value) \
    12198     off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64,  0, 0, 0), \
    12199                                  (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
    12200 
    12201 #define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
    12202     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
    12203                                  (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
    12204 #define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
    12205     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
    12206                                  (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
    12207 #define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
    12208     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
    12209                                  (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
    12210 
    12211 #define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
    12212     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
    12213                                  (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
    12214 #define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
    12215     off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
    12216                                  (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
    12217 
    12218 
    12219 DECL_FORCE_INLINE_THROW(uint32_t)
    12220 iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    12221 {
    12222     /* Use16BitSp: */
    12223 #ifdef RT_ARCH_AMD64
    12224     off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
    12225     off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    12226 #else
    12227     /* sub regeff, regrsp, #cbMem */
    12228     pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
    12229     /* and regeff, regeff, #0xffff */
    12230     Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
    12231     pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0,  false /*f64Bit*/);
    12232     /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
    12233     pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
    12234 #endif
    12235     return off;
    12236 }
    12237 
    12238 
    12239 DECL_FORCE_INLINE(uint32_t)
    12240 iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    12241 {
    12242     /* Use32BitSp: */
    12243     off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
    12244     off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    12245     return off;
    12246 }
    12247 
    12248 
    12249 /** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
    12250 DECL_INLINE_THROW(uint32_t)
    12251 iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
    12252                        uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
    12253 {
    12254     /*
    12255      * Assert sanity.
    12256      */
    12257     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
    12258     PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
    12259 #ifdef VBOX_STRICT
    12260     if (RT_BYTE2(cBitsVarAndFlat) != 0)
    12261     {
    12262         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    12263                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    12264                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    12265         Assert(   pfnFunction
    12266                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
    12267                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
    12268                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
    12269                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
    12270                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
    12271                    : UINT64_C(0xc000b000a0009000) ));
    12272     }
    12273     else
    12274         Assert(   pfnFunction
    12275                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
    12276                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
    12277                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
    12278                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
    12279                    : UINT64_C(0xc000b000a0009000) ));
    12280 #endif
    12281 
    12282 #ifdef VBOX_STRICT
    12283     /*
    12284      * Check that the fExec flags we've got make sense.
    12285      */
    12286     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    12287 #endif
    12288 
    12289     /*
    12290      * To keep things simple we have to commit any pending writes first as we
    12291      * may end up making calls.
    12292      */
    12293     /** @todo we could postpone this till we make the call and reload the
    12294      * registers after returning from the call. Not sure if that's sensible or
    12295      * not, though. */
    12296     off = iemNativeRegFlushPendingWrites(pReNative, off);
    12297 
    12298     /*
    12299      * First we calculate the new RSP and the effective stack pointer value.
    12300      * For 64-bit mode and flat 32-bit these two are the same.
    12301      * (Code structure is very similar to that of PUSH)
    12302      */
    12303     uint8_t const cbMem       = RT_BYTE1(cBitsVarAndFlat) / 8;
    12304     bool const    fIsSegReg   = RT_BYTE3(cBitsVarAndFlat) != 0;
    12305     bool const    fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
    12306     uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
    12307                               ? cbMem : sizeof(uint16_t);
    12308     uint8_t const cBitsFlat   = RT_BYTE2(cBitsVarAndFlat);      RT_NOREF(cBitsFlat);
    12309     uint8_t const idxRegRsp   = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
    12310                                                                 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
    12311     uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
    12312     uint32_t      offFixupJumpToUseOtherBitSp = UINT32_MAX;
    12313     if (cBitsFlat != 0)
    12314     {
    12315         Assert(idxRegEffSp == idxRegRsp);
    12316         Assert(cBitsFlat == 32 || cBitsFlat == 64);
    12317         Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
    12318         if (cBitsFlat == 64)
    12319             off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
    12320         else
    12321             off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
    12322     }
    12323     else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
    12324     {
    12325         Assert(idxRegEffSp != idxRegRsp);
    12326         uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
    12327                                                                      kIemNativeGstRegUse_ReadOnly);
    12328 #ifdef RT_ARCH_AMD64
    12329         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    12330 #else
    12331         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    12332 #endif
    12333         off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
    12334         iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
    12335         offFixupJumpToUseOtherBitSp = off;
    12336         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    12337         {
    12338             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
    12339             off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    12340         }
    12341         else
    12342         {
    12343             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
    12344             off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    12345         }
    12346         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12347     }
    12348     /* SpUpdateEnd: */
    12349     uint32_t const offLabelSpUpdateEnd = off;
    12350 
    12351     /*
    12352      * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
    12353      * we're skipping lookup).
    12354      */
    12355     uint8_t const  iSegReg           = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
    12356     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
    12357     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    12358     uint32_t const idxLabelTlbMiss   = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    12359     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    12360                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    12361                                      : UINT32_MAX;
    12362     uint8_t const  idxRegValue       =    !TlbState.fSkip
    12363                                        && pVarValue->enmKind != kIemNativeVarKind_Immediate
    12364                                      ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
    12365                                                                    IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
    12366                                      : UINT8_MAX;
    12367     uint8_t const  idxRegMemResult   = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
    12368 
    12369 
    12370     if (!TlbState.fSkip)
    12371         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    12372     else
    12373         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
    12374 
    12375     /*
    12376      * Use16BitSp:
    12377      */
    12378     if (cBitsFlat == 0)
    12379     {
    12380 #ifdef RT_ARCH_AMD64
    12381         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    12382 #else
    12383         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    12384 #endif
    12385         iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
    12386         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    12387             off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    12388         else
    12389             off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    12390         off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
    12391         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12392     }
    12393 
    12394     /*
    12395      * TlbMiss:
    12396      *
    12397      * Call helper to do the pushing.
    12398      */
    12399     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    12400 
    12401 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    12402     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    12403 #else
    12404     RT_NOREF(idxInstr);
    12405 #endif
    12406 
    12407     /* Save variables in volatile registers. */
    12408     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    12409                                      | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
    12410                                      | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
    12411                                      | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
    12412     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    12413 
    12414     if (   idxRegValue == IEMNATIVE_CALL_ARG1_GREG
    12415         && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
    12416     {
    12417         /* Swap them using ARG0 as temp register: */
    12418         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
    12419         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
    12420         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
    12421     }
    12422     else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
    12423     {
    12424         /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
    12425         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
    12426                                                         0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    12427 
    12428         /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
    12429         if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
    12430             off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    12431     }
    12432     else
    12433     {
    12434         /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
    12435         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    12436 
    12437         /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
    12438         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
    12439                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
    12440     }
    12441 
    12442     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    12443     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    12444 
    12445     /* Done setting up parameters, make the call. */
    12446     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    12447 
    12448     /* Restore variables and guest shadow registers to volatile registers. */
    12449     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    12450     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    12451 
    12452 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    12453     if (!TlbState.fSkip)
    12454     {
    12455         /* end of TlbMiss - Jump to the done label. */
    12456         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    12457         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    12458 
    12459         /*
    12460          * TlbLookup:
    12461          */
    12462         off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
    12463                                            IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    12464 
    12465         /*
    12466          * Emit code to do the actual storing / fetching.
    12467          */
    12468         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
    12469 # ifdef VBOX_WITH_STATISTICS
    12470         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    12471                                                   RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
    12472 # endif
    12473         if (idxRegValue != UINT8_MAX)
    12474         {
    12475             switch (cbMemAccess)
    12476             {
    12477                 case 2:
    12478                     off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    12479                     break;
    12480                 case 4:
    12481                     if (!fIsIntelSeg)
    12482                         off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    12483                     else
    12484                     {
    12485                         /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
    12486                            PUSH FS in real mode, so we have to try emulate that here.
    12487                            We borrow the now unused idxReg1 from the TLB lookup code here. */
    12488                         uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
    12489                                                                                             kIemNativeGstReg_EFlags);
    12490                         if (idxRegEfl != UINT8_MAX)
    12491                         {
    12492 #ifdef ARCH_AMD64
    12493                             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
    12494                             off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
    12495                                                              UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    12496 #else
    12497                             off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
    12498                                                                   off, TlbState.idxReg1, idxRegEfl,
    12499                                                                   UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    12500 #endif
    12501                             iemNativeRegFreeTmp(pReNative, idxRegEfl);
    12502                         }
    12503                         else
    12504                         {
    12505                             off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
    12506                                                                   RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
    12507                             off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
    12508                                                              UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    12509                         }
    12510                         /* ASSUMES the upper half of idxRegValue is ZERO. */
    12511                         off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
    12512                         off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
    12513                     }
    12514                     break;
    12515                 case 8:
    12516                     off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    12517                     break;
    12518                 default:
    12519                     AssertFailed();
    12520             }
    12521         }
    12522         else
    12523         {
    12524             switch (cbMemAccess)
    12525             {
    12526                 case 2:
    12527                     off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
    12528                                                          idxRegMemResult, TlbState.idxReg1);
    12529                     break;
    12530                 case 4:
    12531                     Assert(!fIsSegReg);
    12532                     off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
    12533                                                          idxRegMemResult, TlbState.idxReg1);
    12534                     break;
    12535                 case 8:
    12536                     off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
    12537                     break;
    12538                 default:
    12539                     AssertFailed();
    12540             }
    12541         }
    12542 
    12543         iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    12544         TlbState.freeRegsAndReleaseVars(pReNative);
    12545 
    12546         /*
    12547          * TlbDone:
    12548          *
    12549          * Commit the new RSP value.
    12550          */
    12551         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    12552     }
    12553 #endif /* IEMNATIVE_WITH_TLB_LOOKUP */
    12554 
    12555     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
    12556     iemNativeRegFreeTmp(pReNative, idxRegRsp);
    12557     if (idxRegEffSp != idxRegRsp)
    12558         iemNativeRegFreeTmp(pReNative, idxRegEffSp);
    12559 
    12560     /* The value variable is implictly flushed. */
    12561     if (idxRegValue != UINT8_MAX)
    12562         iemNativeVarRegisterRelease(pReNative, idxVarValue);
    12563     iemNativeVarFreeLocal(pReNative, idxVarValue);
    12564 
    12565     return off;
    12566 }
    12567 
    12568 
    12569 
    12570 /*                                                     RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
    12571 #define IEM_MC_POP_GREG_U16(a_iGReg) \
    12572     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16,  0, 0, 0), \
    12573                                     (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
    12574 #define IEM_MC_POP_GREG_U32(a_iGReg) \
    12575     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32,  0, 0, 0), \
    12576                                     (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
    12577 #define IEM_MC_POP_GREG_U64(a_iGReg) \
    12578     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64,  0, 0, 0), \
    12579                                     (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
    12580 
    12581 #define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
    12582     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
    12583                                     (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
    12584 #define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
    12585     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
    12586                                     (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
    12587 
    12588 #define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
    12589     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
    12590                                     (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
    12591 #define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
    12592     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
    12593                                     (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
    12594 
    12595 
    12596 DECL_FORCE_INLINE_THROW(uint32_t)
    12597 iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
    12598                              uint8_t idxRegTmp)
    12599 {
    12600     /* Use16BitSp: */
    12601 #ifdef RT_ARCH_AMD64
    12602     off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    12603     off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
    12604     RT_NOREF(idxRegTmp);
    12605 #else
    12606     /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
    12607     pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
    12608     /* add tmp, regrsp, #cbMem */
    12609     pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
    12610     /* and tmp, tmp, #0xffff */
    12611     Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
    12612     pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0,  false /*f64Bit*/);
    12613     /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
    12614     pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
    12615 #endif
    12616     return off;
    12617 }
    12618 
    12619 
    12620 DECL_FORCE_INLINE(uint32_t)
    12621 iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    12622 {
    12623     /* Use32BitSp: */
    12624     off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    12625     off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
    12626     return off;
    12627 }
    12628 
    12629 
    12630 /** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
    12631 DECL_INLINE_THROW(uint32_t)
    12632 iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
    12633                           uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
    12634 {
    12635     /*
    12636      * Assert sanity.
    12637      */
    12638     Assert(idxGReg < 16);
    12639 #ifdef VBOX_STRICT
    12640     if (RT_BYTE2(cBitsVarAndFlat) != 0)
    12641     {
    12642         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    12643                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    12644                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    12645         Assert(   pfnFunction
    12646                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
    12647                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
    12648                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
    12649                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
    12650                    : UINT64_C(0xc000b000a0009000) ));
    12651     }
    12652     else
    12653         Assert(   pfnFunction
    12654                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
    12655                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
    12656                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
    12657                    : UINT64_C(0xc000b000a0009000) ));
    12658 #endif
    12659 
    12660 #ifdef VBOX_STRICT
    12661     /*
    12662      * Check that the fExec flags we've got make sense.
    12663      */
    12664     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    12665 #endif
    12666 
    12667     /*
    12668      * To keep things simple we have to commit any pending writes first as we
    12669      * may end up making calls.
    12670      */
    12671     off = iemNativeRegFlushPendingWrites(pReNative, off);
    12672 
    12673     /*
    12674      * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
    12675      * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
    12676      * directly as the effective stack pointer.
    12677      * (Code structure is very similar to that of PUSH)
    12678      */
    12679     uint8_t const cbMem           = RT_BYTE1(cBitsVarAndFlat) / 8;
    12680     uint8_t const cBitsFlat       = RT_BYTE2(cBitsVarAndFlat);      RT_NOREF(cBitsFlat);
    12681     uint8_t const idxRegRsp       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
    12682                                                                     kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
    12683     uint8_t const idxRegEffSp     = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
    12684     /** @todo can do a better job picking the register here. For cbMem >= 4 this
    12685      *        will be the resulting register value. */
    12686     uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too.  */
    12687 
    12688     uint32_t      offFixupJumpToUseOtherBitSp = UINT32_MAX;
    12689     if (cBitsFlat != 0)
    12690     {
    12691         Assert(idxRegEffSp == idxRegRsp);
    12692         Assert(cBitsFlat == 32 || cBitsFlat == 64);
    12693         Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
    12694     }
    12695     else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
    12696     {
    12697         Assert(idxRegEffSp != idxRegRsp);
    12698         uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
    12699                                                                      kIemNativeGstRegUse_ReadOnly);
    12700 #ifdef RT_ARCH_AMD64
    12701         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    12702 #else
    12703         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    12704 #endif
    12705         off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
    12706         iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
    12707         offFixupJumpToUseOtherBitSp = off;
    12708         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    12709         {
    12710 /** @todo can skip idxRegRsp updating when popping ESP.   */
    12711             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
    12712             off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    12713         }
    12714         else
    12715         {
    12716             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
    12717             off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
    12718         }
    12719         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12720     }
    12721     /* SpUpdateEnd: */
    12722     uint32_t const offLabelSpUpdateEnd = off;
    12723 
    12724     /*
    12725      * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
    12726      * we're skipping lookup).
    12727      */
    12728     uint8_t const  iSegReg           = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
    12729     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
    12730     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    12731     uint32_t const idxLabelTlbMiss   = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    12732     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    12733                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    12734                                      : UINT32_MAX;
    12735 
    12736     if (!TlbState.fSkip)
    12737         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    12738     else
    12739         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
    12740 
    12741     /*
    12742      * Use16BitSp:
    12743      */
    12744     if (cBitsFlat == 0)
    12745     {
    12746 #ifdef RT_ARCH_AMD64
    12747         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    12748 #else
    12749         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    12750 #endif
    12751         iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
    12752         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    12753             off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
    12754         else
    12755             off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    12756         off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
    12757         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12758     }
    12759 
    12760     /*
    12761      * TlbMiss:
    12762      *
    12763      * Call helper to do the pushing.
    12764      */
    12765     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    12766 
    12767 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    12768     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    12769 #else
    12770     RT_NOREF(idxInstr);
    12771 #endif
    12772 
    12773     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    12774                                      | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
    12775                                      | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
    12776     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    12777 
    12778 
    12779     /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
    12780     if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
    12781         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    12782 
    12783     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    12784     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    12785 
    12786     /* Done setting up parameters, make the call. */
    12787     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    12788 
    12789     /* Move the return register content to idxRegMemResult. */
    12790     if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
    12791         off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
    12792 
    12793     /* Restore variables and guest shadow registers to volatile registers. */
    12794     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    12795     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    12796 
    12797 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    12798     if (!TlbState.fSkip)
    12799     {
    12800         /* end of TlbMiss - Jump to the done label. */
    12801         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    12802         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    12803 
    12804         /*
    12805          * TlbLookup:
    12806          */
    12807         off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
    12808                                            idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    12809 
    12810         /*
    12811          * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
    12812          */
    12813         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    12814 # ifdef VBOX_WITH_STATISTICS
    12815         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    12816                                                   RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
    12817 # endif
    12818         switch (cbMem)
    12819         {
    12820             case 2:
    12821                 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    12822                 break;
    12823             case 4:
    12824                 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    12825                 break;
    12826             case 8:
    12827                 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    12828                 break;
    12829             default:
    12830                 AssertFailed();
    12831         }
    12832 
    12833         TlbState.freeRegsAndReleaseVars(pReNative);
    12834 
    12835         /*
    12836          * TlbDone:
    12837          *
    12838          * Set the new RSP value (FLAT accesses needs to calculate it first) and
    12839          * commit the popped register value.
    12840          */
    12841         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    12842     }
    12843 #endif /* IEMNATIVE_WITH_TLB_LOOKUP */
    12844 
    12845     if (idxGReg != X86_GREG_xSP)
    12846     {
    12847         /* Set the register. */
    12848         if (cbMem >= sizeof(uint32_t))
    12849         {
    12850 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    12851             AssertMsg(   pReNative->idxCurCall == 0
    12852                       || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
    12853                       ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
    12854 #endif
    12855             iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult,  IEMNATIVEGSTREG_GPR(idxGReg), off);
    12856             off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
    12857                                                  RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
    12858         }
    12859         else
    12860         {
    12861             Assert(cbMem == sizeof(uint16_t));
    12862             uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
    12863                                                                       kIemNativeGstRegUse_ForUpdate);
    12864             off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
    12865             off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
    12866             iemNativeRegFreeTmp(pReNative, idxRegDst);
    12867         }
    12868 
    12869         /* Complete RSP calculation for FLAT mode. */
    12870         if (idxRegEffSp == idxRegRsp)
    12871         {
    12872             if (cBitsFlat == 64)
    12873                 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
    12874             else
    12875                 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
    12876         }
    12877     }
    12878     else
    12879     {
    12880         /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
    12881         if (cbMem == sizeof(uint64_t))
    12882             off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
    12883         else if (cbMem == sizeof(uint32_t))
    12884             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
    12885         else
    12886         {
    12887             if (idxRegEffSp == idxRegRsp)
    12888             {
    12889                 if (cBitsFlat == 64)
    12890                     off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
    12891                 else
    12892                     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
    12893             }
    12894             off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
    12895         }
    12896     }
    12897     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
    12898 
    12899     iemNativeRegFreeTmp(pReNative, idxRegRsp);
    12900     if (idxRegEffSp != idxRegRsp)
    12901         iemNativeRegFreeTmp(pReNative, idxRegEffSp);
    12902     iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    12903 
    12904     return off;
    12905 }
    12906 
    12907 
    12908 
    12909 /*********************************************************************************************************************************
    12910 *   Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX).                                                                      *
    12911 *********************************************************************************************************************************/
    12912 
    12913 #define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12914     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    12915                                     IEM_ACCESS_DATA_ATOMIC,  0 /*fAlignMask*/, \
    12916                                     (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
    12917 
    12918 #define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12919     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    12920                                     IEM_ACCESS_DATA_RW,  0 /*fAlignMask*/, \
    12921                                     (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
    12922 
    12923 #define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12924     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    12925                                     IEM_ACCESS_DATA_W,  0 /*fAlignMask*/, \
    12926                                     (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
    12927 
    12928 #define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12929     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    12930                                     IEM_ACCESS_DATA_R,  0 /*fAlignMask*/, \
    12931                                     (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
    12932 
    12933 
    12934 #define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12935     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    12936                                     IEM_ACCESS_DATA_ATOMIC,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12937                                     (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
    12938 
    12939 #define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12940     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    12941                                     IEM_ACCESS_DATA_RW,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12942                                     (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
    12943 
    12944 #define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12945     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    12946                                     IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12947                                     (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
    12948 
    12949 #define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12950     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    12951                                     IEM_ACCESS_DATA_R,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12952                                     (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
    12953 
    12954 #define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12955     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
    12956                                     IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12957                                     (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
    12958 
    12959 
    12960 #define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12961     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    12962                                     IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12963                                     (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
    12964 
    12965 #define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12966     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    12967                                     IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12968                                     (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
    12969 
    12970 #define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12971     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    12972                                     IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12973                                     (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
    12974 
    12975 #define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12976     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    12977                                     IEM_ACCESS_DATA_R,  sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12978                                     (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
    12979 
    12980 #define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12981     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
    12982                                     IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12983                                     (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
    12984 
    12985 
    12986 #define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12987     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    12988                                     IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12989                                     (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
    12990 
    12991 #define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12992     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    12993                                     IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12994                                     (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
    12995 #define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12996     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    12997                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12998                                     (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
    12999 
    13000 #define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    13001     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    13002                                     IEM_ACCESS_DATA_R,  sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13003                                     (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
    13004 
    13005 #define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    13006     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
    13007                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13008                                     (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
    13009 
    13010 
    13011 #define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    13012     off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
    13013                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13014                                     (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
    13015 
    13016 #define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    13017     off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
    13018                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
    13019                                     (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
    13020 
    13021 
    13022 #define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    13023     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    13024                                     IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    13025                                     (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
    13026 
    13027 #define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    13028     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    13029                                     IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    13030                                     (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
    13031 
    13032 #define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    13033     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    13034                                     IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    13035                                     (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
    13036 
    13037 #define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    13038     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    13039                                     IEM_ACCESS_DATA_R,  sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    13040                                     (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
    13041 
    13042 
    13043 
    13044 #define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    13045     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    13046                                     IEM_ACCESS_DATA_ATOMIC,  0 /*fAlignMask*/, \
    13047                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
    13048 
    13049 #define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    13050     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    13051                                     IEM_ACCESS_DATA_RW,  0 /*fAlignMask*/, \
    13052                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
    13053 
    13054 #define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    13055     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    13056                                     IEM_ACCESS_DATA_W,  0 /*fAlignMask*/, \
    13057                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
    13058 
    13059 #define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    13060     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    13061                                     IEM_ACCESS_DATA_R,  0 /*fAlignMask*/, \
    13062                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
    13063 
    13064 
    13065 #define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    13066     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    13067                                     IEM_ACCESS_DATA_ATOMIC,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    13068                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
    13069 
    13070 #define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    13071     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    13072                                     IEM_ACCESS_DATA_RW,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    13073                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
    13074 
    13075 #define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    13076     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    13077                                     IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    13078                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
    13079 
    13080 #define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    13081     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    13082                                     IEM_ACCESS_DATA_R,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    13083                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
    13084 
    13085 #define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
    13086     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
    13087                                     IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    13088                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
    13089 
    13090 
    13091 #define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    13092     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    13093                                     IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    13094                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
    13095 
    13096 #define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    13097     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    13098                                     IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    13099                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
    13100 
    13101 #define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    13102     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    13103                                     IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    13104                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
    13105 
    13106 #define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    13107     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    13108                                     IEM_ACCESS_DATA_R,  sizeof(uint32_t) - 1 /*fAlignMask*/, \
    13109                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
    13110 
    13111 #define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
    13112     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
    13113                                     IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    13114                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
    13115 
    13116 
    13117 #define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    13118     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    13119                                     IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13120                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
    13121 
    13122 #define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    13123     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    13124                                     IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13125                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
    13126 
    13127 #define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    13128     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    13129                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13130                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
    13131 
    13132 #define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    13133     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    13134                                     IEM_ACCESS_DATA_R,  sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13135                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
    13136 
    13137 #define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
    13138     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
    13139                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13140                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
    13141 
    13142 
    13143 #define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
    13144     off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
    13145                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    13146                                     (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
    13147 
    13148 #define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
    13149     off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
    13150                                     IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
    13151                                     (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
    13152 
    13153 
    13154 #define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    13155     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    13156                                     IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    13157                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
    13158 
    13159 #define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    13160     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    13161                                     IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    13162                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
    13163 
    13164 #define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    13165     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    13166                                     IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    13167                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
    13168 
    13169 #define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    13170     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    13171                                     IEM_ACCESS_DATA_R,  sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    13172                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
    13173 
    13174 
    13175 DECL_INLINE_THROW(uint32_t)
    13176 iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
    13177                           uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
    13178                           uintptr_t pfnFunction, uint8_t idxInstr)
    13179 {
    13180     /*
    13181      * Assert sanity.
    13182      */
    13183     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
    13184     PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
    13185     AssertStmt(   pVarMem->enmKind == kIemNativeVarKind_Invalid
    13186                && pVarMem->cbVar   == sizeof(void *),
    13187                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    13188 
    13189     PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
    13190     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
    13191     AssertStmt(   pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
    13192                && pVarUnmapInfo->cbVar   == sizeof(uint8_t),
    13193                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    13194 
    13195     PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
    13196     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
    13197     AssertStmt(   pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
    13198                || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
    13199                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    13200 
    13201     Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
    13202 
    13203     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    13204 
    13205 #ifdef VBOX_STRICT
    13206 # define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
    13207         (  ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
    13208          ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
    13209          : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
    13210          ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
    13211 # define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
    13212         (  ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
    13213          ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
    13214          : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
    13215 
    13216     if (iSegReg == UINT8_MAX)
    13217     {
    13218         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    13219                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    13220                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    13221         switch (cbMem)
    13222         {
    13223             case 1:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
    13224             case 2:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
    13225             case 4:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
    13226             case 8:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
    13227             case 10:
    13228                 Assert(   pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
    13229                        || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
    13230                 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
    13231                 break;
    13232             case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
    13233 # if 0
    13234             case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
    13235             case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
    13236 # endif
    13237             default: AssertFailed(); break;
    13238         }
    13239     }
    13240     else
    13241     {
    13242         Assert(iSegReg < 6);
    13243         switch (cbMem)
    13244         {
    13245             case 1:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
    13246             case 2:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
    13247             case 4:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
    13248             case 8:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
    13249             case 10:
    13250                 Assert(   pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
    13251                        || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
    13252                 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
    13253                 break;
    13254             case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
    13255 # if 0
    13256             case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
    13257             case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
    13258 # endif
    13259             default: AssertFailed(); break;
    13260         }
    13261     }
    13262 # undef IEM_MAP_HLP_FN
    13263 # undef IEM_MAP_HLP_FN_NO_AT
    13264 #endif
    13265 
    13266 #ifdef VBOX_STRICT
    13267     /*
    13268      * Check that the fExec flags we've got make sense.
    13269      */
    13270     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    13271 #endif
    13272 
    13273     /*
    13274      * To keep things simple we have to commit any pending writes first as we
    13275      * may end up making calls.
    13276      */
    13277     off = iemNativeRegFlushPendingWrites(pReNative, off);
    13278 
    13279 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13280     /*
    13281      * Move/spill/flush stuff out of call-volatile registers.
    13282      * This is the easy way out. We could contain this to the tlb-miss branch
    13283      * by saving and restoring active stuff here.
    13284      */
    13285     /** @todo save+restore active registers and maybe guest shadows in tlb-miss.  */
    13286     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    13287 #endif
    13288 
    13289     /* The bUnmapInfo variable will get a register in the tlb-hit code path,
    13290        while the tlb-miss codepath will temporarily put it on the stack.
    13291        Set the the type to stack here so we don't need to do it twice below. */
    13292     iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
    13293     uint8_t const idxRegUnmapInfo   = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
    13294     /** @todo use a tmp register from TlbState, since they'll be free after tlb
    13295      *        lookup is done. */
    13296 
    13297     /*
    13298      * Define labels and allocate the result register (trying for the return
    13299      * register if we can).
    13300      */
    13301     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    13302     uint8_t  const idxRegMemResult   = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
    13303                                      ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
    13304                                      : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
    13305     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
    13306     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    13307                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    13308                                      : UINT32_MAX;
    13309 //off=iemNativeEmitBrk(pReNative, off, 0);
    13310     /*
    13311      * Jump to the TLB lookup code.
    13312      */
    13313     if (!TlbState.fSkip)
    13314         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    13315 
    13316     /*
    13317      * TlbMiss:
    13318      *
    13319      * Call helper to do the fetching.
    13320      * We flush all guest register shadow copies here.
    13321      */
    13322     uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
    13323 
    13324 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    13325     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    13326 #else
    13327     RT_NOREF(idxInstr);
    13328 #endif
    13329 
    13330 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13331     /* Save variables in volatile registers. */
    13332     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
    13333     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    13334 #endif
    13335 
    13336     /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
    13337     off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
    13338 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13339                                                     IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
    13340 #else
    13341                                                     IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    13342 #endif
    13343 
    13344     /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
    13345     if (iSegReg != UINT8_MAX)
    13346     {
    13347         AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
    13348         off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
    13349     }
    13350 
    13351     /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
    13352     int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
    13353     off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
    13354 
    13355     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    13356     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    13357 
    13358     /* Done setting up parameters, make the call. */
    13359     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    13360 
    13361     /*
    13362      * Put the output in the right registers.
    13363      */
    13364     Assert(idxRegMemResult == pVarMem->idxReg);
    13365     if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
    13366         off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
    13367 
    13368 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13369     /* Restore variables and guest shadow registers to volatile registers. */
    13370     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    13371     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    13372 #endif
    13373 
    13374     Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
    13375     off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
    13376 
    13377 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    13378     if (!TlbState.fSkip)
    13379     {
    13380         /* end of tlbsmiss - Jump to the done label. */
    13381         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    13382         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    13383 
    13384         /*
    13385          * TlbLookup:
    13386          */
    13387         off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
    13388                                            idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    13389 # ifdef VBOX_WITH_STATISTICS
    13390         off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
    13391                                                 RT_UOFFSETOF(VMCPUCC,  iem.s.StatNativeTlbHitsForMapped));
    13392 # endif
    13393 
    13394         /* [idxVarUnmapInfo] = 0; */
    13395         off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
    13396 
    13397         /*
    13398          * TlbDone:
    13399          */
    13400         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    13401 
    13402         TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
    13403 
    13404 # ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    13405         /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
    13406         iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    13407 # endif
    13408     }
    13409 #else
    13410     RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
    13411 #endif
    13412 
    13413     iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
    13414     iemNativeVarRegisterRelease(pReNative, idxVarMem);
    13415 
    13416     return off;
    13417 }
    13418 
    13419 
    13420 #define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
    13421     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
    13422                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
    13423 
    13424 #define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
    13425     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
    13426                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
    13427 
    13428 #define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
    13429     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
    13430                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
    13431 
    13432 #define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
    13433     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
    13434                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
    13435 
    13436 DECL_INLINE_THROW(uint32_t)
    13437 iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
    13438                                uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
    13439 {
    13440     /*
    13441      * Assert sanity.
    13442      */
    13443     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
    13444 #if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
    13445     PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
    13446 #endif
    13447     Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
    13448     Assert(   pVarUnmapInfo->idxReg       < RT_ELEMENTS(pReNative->Core.aHstRegs)
    13449            || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
    13450 #ifdef VBOX_STRICT
    13451     switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
    13452     {
    13453         case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
    13454             Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
    13455         case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
    13456             Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
    13457         case IEM_ACCESS_TYPE_WRITE:
    13458             Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
    13459         case IEM_ACCESS_TYPE_READ:
    13460             Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
    13461         default: AssertFailed();
    13462     }
    13463 #else
    13464     RT_NOREF(fAccess);
    13465 #endif
    13466 
    13467     /*
    13468      * To keep things simple we have to commit any pending writes first as we
    13469      * may end up making calls (there shouldn't be any at this point, so this
    13470      * is just for consistency).
    13471      */
    13472     /** @todo we could postpone this till we make the call and reload the
    13473      * registers after returning from the call. Not sure if that's sensible or
    13474      * not, though. */
    13475     off = iemNativeRegFlushPendingWrites(pReNative, off);
    13476 
    13477     /*
    13478      * Move/spill/flush stuff out of call-volatile registers.
    13479      *
    13480      * We exclude any register holding the bUnmapInfo variable, as we'll be
    13481      * checking it after returning from the call and will free it afterwards.
    13482      */
    13483     /** @todo save+restore active registers and maybe guest shadows in miss
    13484      *        scenario. */
    13485     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
    13486 
    13487     /*
    13488      * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
    13489      * to call the unmap helper function.
    13490      *
    13491      * The likelyhood of it being zero is higher than for the TLB hit when doing
    13492      * the mapping, as a TLB miss for an well aligned and unproblematic memory
    13493      * access should also end up with a mapping that won't need special unmapping.
    13494      */
    13495     /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case!  That
    13496      *        should speed up things for the pure interpreter as well when TLBs
    13497      *        are enabled. */
    13498 #ifdef RT_ARCH_AMD64
    13499     if (pVarUnmapInfo->idxReg == UINT8_MAX)
    13500     {
    13501         /* test byte [rbp - xxx], 0ffh  */
    13502         uint8_t * const pbCodeBuf    = iemNativeInstrBufEnsure(pReNative, off, 7);
    13503         pbCodeBuf[off++] = 0xf6;
    13504         uint8_t const   idxStackSlot = pVarUnmapInfo->idxStackSlot;
    13505         off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
    13506         pbCodeBuf[off++] = 0xff;
    13507         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    13508     }
    13509     else
    13510 #endif
    13511     {
    13512         uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
    13513                                                               true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
    13514         off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
    13515         iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
    13516     }
    13517     uint32_t const offJmpFixup = off;
    13518     off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
    13519 
    13520     /*
    13521      * Call the unmap helper function.
    13522      */
    13523 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
    13524     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    13525 #else
    13526     RT_NOREF(idxInstr);
    13527 #endif
    13528 
    13529     /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
    13530     off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
    13531                                                0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    13532 
    13533     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    13534     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    13535 
    13536     /* Done setting up parameters, make the call. */
    13537     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    13538 
    13539     /* The bUnmapInfo variable is implictly free by these MCs. */
    13540     iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
    13541 
    13542     /*
    13543      * Done, just fixup the jump for the non-call case.
    13544      */
    13545     iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
    13546 
    13547     return off;
    13548 }
    13549 
    13550 
    13551 
    13552 /*********************************************************************************************************************************
    13553 *   State and Exceptions                                                                                                         *
    13554 *********************************************************************************************************************************/
    13555 
    13556 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13557 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    13558 
    13559 #define IEM_MC_PREPARE_SSE_USAGE()                  off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13560 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13561 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    13562 
    13563 #define IEM_MC_PREPARE_AVX_USAGE()                  off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13564 #define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13565 #define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    13566 
    13567 
    13568 DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
    13569 {
    13570     /** @todo this needs a lot more work later. */
    13571     RT_NOREF(pReNative, fForChange);
    13572     return off;
    13573 }
    13574 
    13575 
    13576 
    13577 /*********************************************************************************************************************************
    13578 *   Emitters for FPU related operations.                                                                                         *
    13579 *********************************************************************************************************************************/
    13580 
    13581 #define IEM_MC_FETCH_FCW(a_u16Fcw) \
    13582     off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
    13583 
    13584 /** Emits code for IEM_MC_FETCH_FCW. */
    13585 DECL_INLINE_THROW(uint32_t)
    13586 iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
    13587 {
    13588     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    13589     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
    13590 
    13591     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    13592 
    13593     /* Allocate a temporary FCW register. */
    13594     /** @todo eliminate extra register   */
    13595     uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
    13596                                                               kIemNativeGstRegUse_ReadOnly);
    13597 
    13598     off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
    13599 
    13600     /* Free but don't flush the FCW register. */
    13601     iemNativeRegFreeTmp(pReNative, idxFcwReg);
    13602     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    13603 
    13604     return off;
    13605 }
    13606 
    13607 
    13608 #define IEM_MC_FETCH_FSW(a_u16Fsw) \
    13609     off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
    13610 
    13611 /** Emits code for IEM_MC_FETCH_FSW. */
    13612 DECL_INLINE_THROW(uint32_t)
    13613 iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
    13614 {
    13615     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    13616     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
    13617 
    13618     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
    13619     /* Allocate a temporary FSW register. */
    13620     /** @todo eliminate extra register   */
    13621     uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
    13622                                                               kIemNativeGstRegUse_ReadOnly);
    13623 
    13624     off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
    13625 
    13626     /* Free but don't flush the FSW register. */
    13627     iemNativeRegFreeTmp(pReNative, idxFswReg);
    13628     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    13629 
    13630     return off;
    13631 }
    13632 
    13633 
    13634 
    13635 /*********************************************************************************************************************************
    13636 *   The native code generator functions for each MC block.                                                                       *
    13637 *********************************************************************************************************************************/
    13638 
    13639 
    13640 /*
    13641  * Include g_apfnIemNativeRecompileFunctions and associated functions.
    13642  *
    13643  * This should probably live in it's own file later, but lets see what the
    13644  * compile times turn out to be first.
    13645  */
    13646 #include "IEMNativeFunctions.cpp.h"
    13647 
    13648 
    13649 
    13650 /*********************************************************************************************************************************
    13651 *   Recompiler Core.                                                                                                             *
    13652 *********************************************************************************************************************************/
    13653 
    13654 
    13655 /** @callback_method_impl{FNDISREADBYTES, Dummy.} */
    13656 static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
    13657 {
    13658     RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
    13659     pDis->cbCachedInstr += cbMaxRead;
    13660     RT_NOREF(cbMinRead);
    13661     return VERR_NO_DATA;
    13662 }
    13663 
    13664 
    13665 DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
    13666 {
    13667     static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
    13668     {
    13669 #define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
    13670         ENTRY(fLocalForcedActions),
    13671         ENTRY(iem.s.rcPassUp),
    13672         ENTRY(iem.s.fExec),
    13673         ENTRY(iem.s.pbInstrBuf),
    13674         ENTRY(iem.s.uInstrBufPc),
    13675         ENTRY(iem.s.GCPhysInstrBuf),
    13676         ENTRY(iem.s.cbInstrBufTotal),
    13677         ENTRY(iem.s.idxTbCurInstr),
    13678 #ifdef VBOX_WITH_STATISTICS
    13679         ENTRY(iem.s.StatNativeTlbHitsForFetch),
    13680         ENTRY(iem.s.StatNativeTlbHitsForStore),
    13681         ENTRY(iem.s.StatNativeTlbHitsForStack),
    13682         ENTRY(iem.s.StatNativeTlbHitsForMapped),
    13683         ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
    13684         ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
    13685         ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
    13686         ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
    13687 #endif
    13688         ENTRY(iem.s.DataTlb.aEntries),
    13689         ENTRY(iem.s.DataTlb.uTlbRevision),
    13690         ENTRY(iem.s.DataTlb.uTlbPhysRev),
    13691         ENTRY(iem.s.DataTlb.cTlbHits),
    13692         ENTRY(iem.s.CodeTlb.aEntries),
    13693         ENTRY(iem.s.CodeTlb.uTlbRevision),
    13694         ENTRY(iem.s.CodeTlb.uTlbPhysRev),
    13695         ENTRY(iem.s.CodeTlb.cTlbHits),
    13696         ENTRY(pVMR3),
    13697         ENTRY(cpum.GstCtx.rax),
    13698         ENTRY(cpum.GstCtx.ah),
    13699         ENTRY(cpum.GstCtx.rcx),
    13700         ENTRY(cpum.GstCtx.ch),
    13701         ENTRY(cpum.GstCtx.rdx),
    13702         ENTRY(cpum.GstCtx.dh),
    13703         ENTRY(cpum.GstCtx.rbx),
    13704         ENTRY(cpum.GstCtx.bh),
    13705         ENTRY(cpum.GstCtx.rsp),
    13706         ENTRY(cpum.GstCtx.rbp),
    13707         ENTRY(cpum.GstCtx.rsi),
    13708         ENTRY(cpum.GstCtx.rdi),
    13709         ENTRY(cpum.GstCtx.r8),
    13710         ENTRY(cpum.GstCtx.r9),
    13711         ENTRY(cpum.GstCtx.r10),
    13712         ENTRY(cpum.GstCtx.r11),
    13713         ENTRY(cpum.GstCtx.r12),
    13714         ENTRY(cpum.GstCtx.r13),
    13715         ENTRY(cpum.GstCtx.r14),
    13716         ENTRY(cpum.GstCtx.r15),
    13717         ENTRY(cpum.GstCtx.es.Sel),
    13718         ENTRY(cpum.GstCtx.es.u64Base),
    13719         ENTRY(cpum.GstCtx.es.u32Limit),
    13720         ENTRY(cpum.GstCtx.es.Attr),
    13721         ENTRY(cpum.GstCtx.cs.Sel),
    13722         ENTRY(cpum.GstCtx.cs.u64Base),
    13723         ENTRY(cpum.GstCtx.cs.u32Limit),
    13724         ENTRY(cpum.GstCtx.cs.Attr),
    13725         ENTRY(cpum.GstCtx.ss.Sel),
    13726         ENTRY(cpum.GstCtx.ss.u64Base),
    13727         ENTRY(cpum.GstCtx.ss.u32Limit),
    13728         ENTRY(cpum.GstCtx.ss.Attr),
    13729         ENTRY(cpum.GstCtx.ds.Sel),
    13730         ENTRY(cpum.GstCtx.ds.u64Base),
    13731         ENTRY(cpum.GstCtx.ds.u32Limit),
    13732         ENTRY(cpum.GstCtx.ds.Attr),
    13733         ENTRY(cpum.GstCtx.fs.Sel),
    13734         ENTRY(cpum.GstCtx.fs.u64Base),
    13735         ENTRY(cpum.GstCtx.fs.u32Limit),
    13736         ENTRY(cpum.GstCtx.fs.Attr),
    13737         ENTRY(cpum.GstCtx.gs.Sel),
    13738         ENTRY(cpum.GstCtx.gs.u64Base),
    13739         ENTRY(cpum.GstCtx.gs.u32Limit),
    13740         ENTRY(cpum.GstCtx.gs.Attr),
    13741         ENTRY(cpum.GstCtx.rip),
    13742         ENTRY(cpum.GstCtx.eflags),
    13743         ENTRY(cpum.GstCtx.uRipInhibitInt),
    13744 #undef ENTRY
    13745     };
    13746 #ifdef VBOX_STRICT
    13747     static bool s_fOrderChecked = false;
    13748     if (!s_fOrderChecked)
    13749     {
    13750         s_fOrderChecked = true;
    13751         uint32_t offPrev = s_aMembers[0].off;
    13752         for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
    13753         {
    13754             Assert(s_aMembers[i].off > offPrev);
    13755             offPrev = s_aMembers[i].off;
    13756         }
    13757     }
    13758 #endif
    13759 
    13760     /*
    13761      * Binary lookup.
    13762      */
    13763     unsigned iStart = 0;
    13764     unsigned iEnd   = RT_ELEMENTS(s_aMembers);
    13765     for (;;)
    13766     {
    13767         unsigned const iCur   = iStart + (iEnd - iStart) / 2;
    13768         uint32_t const offCur = s_aMembers[iCur].off;
    13769         if (off < offCur)
    13770         {
    13771             if (iCur != iStart)
    13772                 iEnd = iCur;
    13773             else
    13774                 break;
    13775         }
    13776         else if (off > offCur)
    13777         {
    13778             if (iCur + 1 < iEnd)
    13779                 iStart = iCur + 1;
    13780             else
    13781                 break;
    13782         }
    13783         else
    13784             return s_aMembers[iCur].pszName;
    13785     }
    13786 #ifdef VBOX_WITH_STATISTICS
    13787     if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
    13788         return "iem.s.acThreadedFuncStats[iFn]";
    13789 #endif
    13790     return NULL;
    13791 }
    13792 
    13793 
    13794 /**
    13795  * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
    13796  * @returns pszBuf.
    13797  * @param   fFlags  The flags.
    13798  * @param   pszBuf  The output buffer.
    13799  * @param   cbBuf   The output buffer size.  At least 32 bytes.
    13800  */
    13801 DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
    13802 {
    13803     Assert(cbBuf >= 32);
    13804     static RTSTRTUPLE const s_aModes[] =
    13805     {
    13806         /* [00] = */ { RT_STR_TUPLE("16BIT") },
    13807         /* [01] = */ { RT_STR_TUPLE("32BIT") },
    13808         /* [02] = */ { RT_STR_TUPLE("!2!") },
    13809         /* [03] = */ { RT_STR_TUPLE("!3!") },
    13810         /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
    13811         /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
    13812         /* [06] = */ { RT_STR_TUPLE("!6!") },
    13813         /* [07] = */ { RT_STR_TUPLE("!7!") },
    13814         /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
    13815         /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
    13816         /* [0a] = */ { RT_STR_TUPLE("64BIT") },
    13817         /* [0b] = */ { RT_STR_TUPLE("!b!") },
    13818         /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
    13819         /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
    13820         /* [0e] = */ { RT_STR_TUPLE("!e!") },
    13821         /* [0f] = */ { RT_STR_TUPLE("!f!") },
    13822         /* [10] = */ { RT_STR_TUPLE("!10!") },
    13823         /* [11] = */ { RT_STR_TUPLE("!11!") },
    13824         /* [12] = */ { RT_STR_TUPLE("!12!") },
    13825         /* [13] = */ { RT_STR_TUPLE("!13!") },
    13826         /* [14] = */ { RT_STR_TUPLE("!14!") },
    13827         /* [15] = */ { RT_STR_TUPLE("!15!") },
    13828         /* [16] = */ { RT_STR_TUPLE("!16!") },
    13829         /* [17] = */ { RT_STR_TUPLE("!17!") },
    13830         /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
    13831         /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
    13832         /* [1a] = */ { RT_STR_TUPLE("!1a!") },
    13833         /* [1b] = */ { RT_STR_TUPLE("!1b!") },
    13834         /* [1c] = */ { RT_STR_TUPLE("!1c!") },
    13835         /* [1d] = */ { RT_STR_TUPLE("!1d!") },
    13836         /* [1e] = */ { RT_STR_TUPLE("!1e!") },
    13837         /* [1f] = */ { RT_STR_TUPLE("!1f!") },
    13838     };
    13839     AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
    13840     memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
    13841     size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
    13842 
    13843     pszBuf[off++] = ' ';
    13844     pszBuf[off++] = 'C';
    13845     pszBuf[off++] = 'P';
    13846     pszBuf[off++] = 'L';
    13847     pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
    13848     Assert(off < 32);
    13849 
    13850     fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
    13851 
    13852     static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
    13853     {
    13854         { RT_STR_TUPLE("BYPASS_HANDLERS"),      IEM_F_BYPASS_HANDLERS    },
    13855         { RT_STR_TUPLE("PENDING_BRK_INSTR"),    IEM_F_PENDING_BRK_INSTR  },
    13856         { RT_STR_TUPLE("PENDING_BRK_DATA"),     IEM_F_PENDING_BRK_DATA   },
    13857         { RT_STR_TUPLE("PENDING_BRK_X86_IO"),   IEM_F_PENDING_BRK_X86_IO },
    13858         { RT_STR_TUPLE("X86_DISREGARD_LOCK"),   IEM_F_X86_DISREGARD_LOCK },
    13859         { RT_STR_TUPLE("X86_CTX_VMX"),          IEM_F_X86_CTX_VMX        },
    13860         { RT_STR_TUPLE("X86_CTX_SVM"),          IEM_F_X86_CTX_SVM        },
    13861         { RT_STR_TUPLE("X86_CTX_IN_GUEST"),     IEM_F_X86_CTX_IN_GUEST   },
    13862         { RT_STR_TUPLE("X86_CTX_SMM"),          IEM_F_X86_CTX_SMM        },
    13863         { RT_STR_TUPLE("INHIBIT_SHADOW"),       IEMTB_F_INHIBIT_SHADOW   },
    13864         { RT_STR_TUPLE("INHIBIT_NMI"),          IEMTB_F_INHIBIT_NMI      },
    13865         { RT_STR_TUPLE("CS_LIM_CHECKS"),        IEMTB_F_CS_LIM_CHECKS    },
    13866         { RT_STR_TUPLE("TYPE_THREADED"),        IEMTB_F_TYPE_THREADED    },
    13867         { RT_STR_TUPLE("TYPE_NATIVE"),          IEMTB_F_TYPE_NATIVE      },
    13868     };
    13869     if (fFlags)
    13870         for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
    13871             if (s_aFlags[i].fFlag & fFlags)
    13872             {
    13873                 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
    13874                 pszBuf[off++] = ' ';
    13875                 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
    13876                 off += s_aFlags[i].cchName;
    13877                 fFlags &= ~s_aFlags[i].fFlag;
    13878                 if (!fFlags)
    13879                     break;
    13880             }
    13881     pszBuf[off] = '\0';
    13882 
    13883     return pszBuf;
    13884 }
    13885 
    13886 
    13887 DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
    13888 {
    13889     AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
    13890 #if defined(RT_ARCH_AMD64)
    13891     static const char * const a_apszMarkers[] =
    13892     {
    13893         /*[0]=*/ "unknown0",        "CheckCsLim",           "ConsiderLimChecking",  "CheckOpcodes",
    13894         /*[4]=*/ "PcAfterBranch",   "LoadTlbForNewPage",    "LoadTlbAfterBranch"
    13895     };
    13896 #endif
    13897 
    13898     char                    szDisBuf[512];
    13899     DISSTATE                Dis;
    13900     PCIEMNATIVEINSTR const  paNative      = pTb->Native.paInstructions;
    13901     uint32_t const          cNative       = pTb->Native.cInstructions;
    13902     uint32_t                offNative     = 0;
    13903 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    13904     PCIEMTBDBG const        pDbgInfo      = pTb->pDbgInfo;
    13905 #endif
    13906     DISCPUMODE              enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    13907                                           : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    13908                                           :                                                            DISCPUMODE_64BIT;
    13909 #if   defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    13910     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_64BIT;
    13911 #elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    13912     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_ARMV8_A64;
    13913 #elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    13914 # error "Port me"
    13915 #else
    13916     csh                     hDisasm       = ~(size_t)0;
    13917 # if defined(RT_ARCH_AMD64)
    13918     cs_err                  rcCs          = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
    13919 # elif defined(RT_ARCH_ARM64)
    13920     cs_err                  rcCs          = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
    13921 # else
    13922 #  error "Port me"
    13923 # endif
    13924     AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
    13925 #endif
    13926 
    13927     /*
    13928      * Print TB info.
    13929      */
    13930     pHlp->pfnPrintf(pHlp,
    13931                     "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
    13932                     "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
    13933                     pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
    13934                     pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
    13935 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    13936     if (pDbgInfo && pDbgInfo->cEntries > 1)
    13937     {
    13938         Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
    13939 
    13940         /*
    13941          * This disassembly is driven by the debug info which follows the native
    13942          * code and indicates when it starts with the next guest instructions,
    13943          * where labels are and such things.
    13944          */
    13945         uint32_t                idxThreadedCall  = 0;
    13946         uint32_t                fExec            = pTb->fFlags & UINT32_C(0x00ffffff);
    13947         uint8_t                 idxRange         = UINT8_MAX;
    13948         uint8_t const           cRanges          = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
    13949         uint32_t                offRange         = 0;
    13950         uint32_t                offOpcodes       = 0;
    13951         uint32_t const          cbOpcodes        = pTb->cbOpcodes;
    13952         RTGCPHYS                GCPhysPc         = pTb->GCPhysPc;
    13953         uint32_t const          cDbgEntries      = pDbgInfo->cEntries;
    13954         uint32_t                iDbgEntry        = 1;
    13955         uint32_t                offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
    13956 
    13957         while (offNative < cNative)
    13958         {
    13959             /* If we're at or have passed the point where the next chunk of debug
    13960                info starts, process it. */
    13961             if (offDbgNativeNext <= offNative)
    13962             {
    13963                 offDbgNativeNext = UINT32_MAX;
    13964                 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
    13965                 {
    13966                     switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
    13967                     {
    13968                         case kIemTbDbgEntryType_GuestInstruction:
    13969                         {
    13970                             /* Did the exec flag change? */
    13971                             if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
    13972                             {
    13973                                 pHlp->pfnPrintf(pHlp,
    13974                                                 "  fExec change %#08x -> %#08x %s\n",
    13975                                                 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    13976                                                 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    13977                                                                    szDisBuf, sizeof(szDisBuf)));
    13978                                 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
    13979                                 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    13980                                               : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    13981                                               :                                                      DISCPUMODE_64BIT;
    13982                             }
    13983 
    13984                             /* New opcode range? We need to fend up a spurious debug info entry here for cases
    13985                                where the compilation was aborted before the opcode was recorded and the actual
    13986                                instruction was translated to a threaded call.  This may happen when we run out
    13987                                of ranges, or when some complicated interrupts/FFs are found to be pending or
    13988                                similar.  So, we just deal with it here rather than in the compiler code as it
    13989                                is a lot simpler to do here. */
    13990                             if (   idxRange == UINT8_MAX
    13991                                 || idxRange >= cRanges
    13992                                 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
    13993                             {
    13994                                 idxRange += 1;
    13995                                 if (idxRange < cRanges)
    13996                                     offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
    13997                                 else
    13998                                     continue;
    13999                                 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
    14000                                 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
    14001                                          + (pTb->aRanges[idxRange].idxPhysPage == 0
    14002                                             ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    14003                                             : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
    14004                                 pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    14005                                                 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
    14006                                                 pTb->aRanges[idxRange].idxPhysPage);
    14007                                 GCPhysPc += offRange;
    14008                             }
    14009 
    14010                             /* Disassemble the instruction. */
    14011                             //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
    14012                             uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
    14013                             uint32_t      cbInstr    = 1;
    14014                             int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    14015                                                                  &pTb->pabOpcodes[offOpcodes], cbInstrMax,
    14016                                                                  iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    14017                             if (RT_SUCCESS(rc))
    14018                             {
    14019                                 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    14020                                                              DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    14021                                                              | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    14022                                                              NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    14023 
    14024                                 static unsigned const s_offMarker  = 55;
    14025                                 static char const     s_szMarker[] = " ; <--- guest";
    14026                                 if (cch < s_offMarker)
    14027                                 {
    14028                                     memset(&szDisBuf[cch], ' ', s_offMarker - cch);
    14029                                     cch = s_offMarker;
    14030                                 }
    14031                                 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
    14032                                     memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
    14033 
    14034                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
    14035                             }
    14036                             else
    14037                             {
    14038                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
    14039                                                 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
    14040                                 cbInstr = 1;
    14041                             }
    14042                             GCPhysPc   += cbInstr;
    14043                             offOpcodes += cbInstr;
    14044                             offRange   += cbInstr;
    14045                             continue;
    14046                         }
    14047 
    14048                         case kIemTbDbgEntryType_ThreadedCall:
    14049                             pHlp->pfnPrintf(pHlp,
    14050                                             "  Call #%u to %s (%u args) - %s\n",
    14051                                             idxThreadedCall,
    14052                                             g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    14053                                             g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    14054                                             pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
    14055                             idxThreadedCall++;
    14056                             continue;
    14057 
    14058                         case kIemTbDbgEntryType_GuestRegShadowing:
    14059                         {
    14060                             PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
    14061                             const char * const    pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
    14062                             if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
    14063                                 pHlp->pfnPrintf(pHlp, "  Guest register %s != host register %s\n", pszGstReg,
    14064                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    14065                             else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
    14066                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s\n", pszGstReg,
    14067                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
    14068                             else
    14069                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s (previously in %s)\n", pszGstReg,
    14070                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
    14071                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    14072                             continue;
    14073                         }
    14074 
    14075                         case kIemTbDbgEntryType_Label:
    14076                         {
    14077                             const char *pszName    = "what_the_fudge";
    14078                             const char *pszComment = "";
    14079                             bool        fNumbered  = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
    14080                             switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
    14081                             {
    14082                                 case kIemNativeLabelType_Return:
    14083                                     pszName = "Return";
    14084                                     break;
    14085                                 case kIemNativeLabelType_ReturnBreak:
    14086                                     pszName = "ReturnBreak";
    14087                                     break;
    14088                                 case kIemNativeLabelType_ReturnWithFlags:
    14089                                     pszName = "ReturnWithFlags";
    14090                                     break;
    14091                                 case kIemNativeLabelType_NonZeroRetOrPassUp:
    14092                                     pszName = "NonZeroRetOrPassUp";
    14093                                     break;
    14094                                 case kIemNativeLabelType_RaiseGp0:
    14095                                     pszName = "RaiseGp0";
    14096                                     break;
    14097                                 case kIemNativeLabelType_RaiseNm:
    14098                                     pszName = "RaiseNm";
    14099                                     break;
    14100                                 case kIemNativeLabelType_RaiseUd:
    14101                                     pszName = "RaiseUd";
    14102                                     break;
    14103                                 case kIemNativeLabelType_ObsoleteTb:
    14104                                     pszName = "ObsoleteTb";
    14105                                     break;
    14106                                 case kIemNativeLabelType_NeedCsLimChecking:
    14107                                     pszName = "NeedCsLimChecking";
    14108                                     break;
    14109                                 case kIemNativeLabelType_CheckBranchMiss:
    14110                                     pszName = "CheckBranchMiss";
    14111                                     break;
    14112                                 case kIemNativeLabelType_If:
    14113                                     pszName = "If";
    14114                                     fNumbered = true;
    14115                                     break;
    14116                                 case kIemNativeLabelType_Else:
    14117                                     pszName = "Else";
    14118                                     fNumbered = true;
    14119                                     pszComment = "   ; regs state restored pre-if-block";
    14120                                     break;
    14121                                 case kIemNativeLabelType_Endif:
    14122                                     pszName = "Endif";
    14123                                     fNumbered = true;
    14124                                     break;
    14125                                 case kIemNativeLabelType_CheckIrq:
    14126                                     pszName = "CheckIrq_CheckVM";
    14127                                     fNumbered = true;
    14128                                     break;
    14129                                 case kIemNativeLabelType_TlbLookup:
    14130                                     pszName = "TlbLookup";
    14131                                     fNumbered = true;
    14132                                     break;
    14133                                 case kIemNativeLabelType_TlbMiss:
    14134                                     pszName = "TlbMiss";
    14135                                     fNumbered = true;
    14136                                     break;
    14137                                 case kIemNativeLabelType_TlbDone:
    14138                                     pszName = "TlbDone";
    14139                                     fNumbered = true;
    14140                                     break;
    14141                                 case kIemNativeLabelType_Invalid:
    14142                                 case kIemNativeLabelType_End:
    14143                                     break;
    14144                             }
    14145                             if (fNumbered)
    14146                                 pHlp->pfnPrintf(pHlp, "  %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
    14147                             else
    14148                                 pHlp->pfnPrintf(pHlp, "  %s:\n", pszName);
    14149                             continue;
    14150                         }
    14151 
    14152                         case kIemTbDbgEntryType_NativeOffset:
    14153                             offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
    14154                             Assert(offDbgNativeNext > offNative);
    14155                             break;
    14156 
    14157                         default:
    14158                             AssertFailed();
    14159                     }
    14160                     iDbgEntry++;
    14161                     break;
    14162                 }
    14163             }
    14164 
    14165             /*
    14166              * Disassemble the next native instruction.
    14167              */
    14168             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    14169 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    14170             uint32_t               cbInstr    = sizeof(paNative[0]);
    14171             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    14172             if (RT_SUCCESS(rc))
    14173             {
    14174 #  if defined(RT_ARCH_AMD64)
    14175                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    14176                 {
    14177                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    14178                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    14179                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    14180                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    14181                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    14182                                         uInfo & 0x8000 ? "recompiled" : "todo");
    14183                     else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
    14184                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
    14185                     else
    14186                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    14187                 }
    14188                 else
    14189 #  endif
    14190                 {
    14191                     const char *pszAnnotation = NULL;
    14192 #  ifdef RT_ARCH_AMD64
    14193                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    14194                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    14195                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    14196                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    14197                     PCDISOPPARAM pMemOp;
    14198                     if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
    14199                         pMemOp = &Dis.Param1;
    14200                     else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
    14201                         pMemOp = &Dis.Param2;
    14202                     else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
    14203                         pMemOp = &Dis.Param3;
    14204                     else
    14205                         pMemOp = NULL;
    14206                     if (   pMemOp
    14207                         && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
    14208                         && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
    14209                         pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
    14210                                                                      ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
    14211 
    14212 #elif defined(RT_ARCH_ARM64)
    14213                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    14214                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    14215                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    14216 #  else
    14217 #   error "Port me"
    14218 #  endif
    14219                     if (pszAnnotation)
    14220                     {
    14221                         static unsigned const s_offAnnotation = 55;
    14222                         size_t const          cchAnnotation   = strlen(pszAnnotation);
    14223                         size_t                cchDis          = strlen(szDisBuf);
    14224                         if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
    14225                         {
    14226                             if (cchDis < s_offAnnotation)
    14227                             {
    14228                                 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
    14229                                 cchDis = s_offAnnotation;
    14230                             }
    14231                             szDisBuf[cchDis++] = ' ';
    14232                             szDisBuf[cchDis++] = ';';
    14233                             szDisBuf[cchDis++] = ' ';
    14234                             memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
    14235                         }
    14236                     }
    14237                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    14238                 }
    14239             }
    14240             else
    14241             {
    14242 #  if defined(RT_ARCH_AMD64)
    14243                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    14244                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    14245 #  elif defined(RT_ARCH_ARM64)
    14246                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    14247 #  else
    14248 #   error "Port me"
    14249 #  endif
    14250                 cbInstr = sizeof(paNative[0]);
    14251             }
    14252             offNative += cbInstr / sizeof(paNative[0]);
    14253 
    14254 #  else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    14255             cs_insn *pInstr;
    14256             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    14257                                          (uintptr_t)pNativeCur, 1, &pInstr);
    14258             if (cInstrs > 0)
    14259             {
    14260                 Assert(cInstrs == 1);
    14261 #  if defined(RT_ARCH_AMD64)
    14262                 pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    14263                                 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    14264 #  else
    14265                 pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    14266                                 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    14267 #  endif
    14268                 offNative += pInstr->size / sizeof(*pNativeCur);
    14269                 cs_free(pInstr, cInstrs);
    14270             }
    14271             else
    14272             {
    14273 #  if defined(RT_ARCH_AMD64)
    14274                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    14275                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    14276 #  else
    14277                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    14278 #  endif
    14279                 offNative++;
    14280             }
    14281 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    14282         }
    14283     }
    14284     else
    14285 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    14286     {
    14287         /*
    14288          * No debug info, just disassemble the x86 code and then the native code.
    14289          *
    14290          * First the guest code:
    14291          */
    14292         for (unsigned i = 0; i < pTb->cRanges; i++)
    14293         {
    14294             RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
    14295                               + (pTb->aRanges[i].idxPhysPage == 0
    14296                                  ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    14297                                  : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
    14298             pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    14299                             i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
    14300             unsigned       off       = pTb->aRanges[i].offOpcodes;
    14301             /** @todo this ain't working when crossing pages!   */
    14302             unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
    14303             while (off < cbOpcodes)
    14304             {
    14305                 uint32_t cbInstr = 1;
    14306                 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    14307                                                      &pTb->pabOpcodes[off], cbOpcodes - off,
    14308                                                      iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    14309                 if (RT_SUCCESS(rc))
    14310                 {
    14311                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    14312                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    14313                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    14314                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    14315                     pHlp->pfnPrintf(pHlp, "    %RGp: %s\n", GCPhysPc, szDisBuf);
    14316                     GCPhysPc += cbInstr;
    14317                     off      += cbInstr;
    14318                 }
    14319                 else
    14320                 {
    14321                     pHlp->pfnPrintf(pHlp, "    %RGp: %.*Rhxs - disassembly failure %Rrc\n",
    14322                                     GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
    14323                     break;
    14324                 }
    14325             }
    14326         }
    14327 
    14328         /*
    14329          * Then the native code:
    14330          */
    14331         pHlp->pfnPrintf(pHlp, "  Native code %p L %#x\n", paNative, cNative);
    14332         while (offNative < cNative)
    14333         {
    14334             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    14335 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    14336             uint32_t               cbInstr    = sizeof(paNative[0]);
    14337             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    14338             if (RT_SUCCESS(rc))
    14339             {
    14340 #  if defined(RT_ARCH_AMD64)
    14341                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    14342                 {
    14343                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    14344                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    14345                         pHlp->pfnPrintf(pHlp, "\n    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    14346                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    14347                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    14348                                         uInfo & 0x8000 ? "recompiled" : "todo");
    14349                     else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
    14350                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
    14351                     else
    14352                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    14353                 }
    14354                 else
    14355 #  endif
    14356                 {
    14357 #  ifdef RT_ARCH_AMD64
    14358                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    14359                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    14360                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    14361                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    14362 #  elif defined(RT_ARCH_ARM64)
    14363                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    14364                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    14365                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    14366 #  else
    14367 #   error "Port me"
    14368 #  endif
    14369                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    14370                 }
    14371             }
    14372             else
    14373             {
    14374 #  if defined(RT_ARCH_AMD64)
    14375                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    14376                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    14377 #  else
    14378                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    14379 #  endif
    14380                 cbInstr = sizeof(paNative[0]);
    14381             }
    14382             offNative += cbInstr / sizeof(paNative[0]);
    14383 
    14384 # else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    14385             cs_insn *pInstr;
    14386             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    14387                                          (uintptr_t)pNativeCur, 1, &pInstr);
    14388             if (cInstrs > 0)
    14389             {
    14390                 Assert(cInstrs == 1);
    14391 #  if defined(RT_ARCH_AMD64)
    14392                 pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    14393                                 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    14394 #  else
    14395                 pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    14396                                 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    14397 #  endif
    14398                 offNative += pInstr->size / sizeof(*pNativeCur);
    14399                 cs_free(pInstr, cInstrs);
    14400             }
    14401             else
    14402             {
    14403 #  if defined(RT_ARCH_AMD64)
    14404                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    14405                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    14406 #  else
    14407                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    14408 #  endif
    14409                 offNative++;
    14410             }
    14411 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    14412         }
    14413     }
    14414 
    14415 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    14416     /* Cleanup. */
    14417     cs_close(&hDisasm);
    14418 #endif
    14419 }
    14420 
    14421 
    14422 /**
    14423  * Recompiles the given threaded TB into a native one.
    14424  *
    14425  * In case of failure the translation block will be returned as-is.
    14426  *
    14427  * @returns pTb.
    14428  * @param   pVCpu   The cross context virtual CPU structure of the calling
    14429  *                  thread.
    14430  * @param   pTb     The threaded translation to recompile to native.
    14431  */
    14432 DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
    14433 {
    14434     STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
    14435 
    14436     /*
    14437      * The first time thru, we allocate the recompiler state, the other times
    14438      * we just need to reset it before using it again.
    14439      */
    14440     PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
    14441     if (RT_LIKELY(pReNative))
    14442         iemNativeReInit(pReNative, pTb);
    14443     else
    14444     {
    14445         pReNative = iemNativeInit(pVCpu, pTb);
    14446         AssertReturn(pReNative, pTb);
    14447     }
    14448 
    14449 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    14450     /*
    14451      * First do liveness analysis.  This is done backwards.
    14452      */
    14453     {
    14454         uint32_t idxCall = pTb->Thrd.cCalls;
    14455         if (idxCall <= pReNative->cLivenessEntriesAlloc)
    14456         { /* likely */ }
    14457         else
    14458         {
    14459             uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
    14460             while (idxCall > cAlloc)
    14461                 cAlloc *= 2;
    14462             void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
    14463             AssertReturn(pvNew, pTb);
    14464             pReNative->paLivenessEntries     = (PIEMLIVENESSENTRY)pvNew;
    14465             pReNative->cLivenessEntriesAlloc = cAlloc;
    14466         }
    14467         AssertReturn(idxCall > 0, pTb);
    14468         PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
    14469 
    14470         /* The initial (final) entry. */
    14471         idxCall--;
    14472         IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
    14473 
    14474         /* Loop backwards thru the calls and fill in the other entries. */
    14475         PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
    14476         while (idxCall > 0)
    14477         {
    14478             PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
    14479             if (pfnLiveness)
    14480                 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
    14481             else
    14482                 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
    14483             pCallEntry--;
    14484             idxCall--;
    14485         }
    14486 
    14487 # ifdef VBOX_WITH_STATISTICS
    14488         /* Check if there are any EFLAGS optimization to be had here.  This requires someone settings them
    14489            to 'clobbered' rather that 'input'.  */
    14490         /** @todo */
    14491 # endif
    14492     }
    14493 #endif
    14494 
    14495     /*
    14496      * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
    14497      * for aborting if an error happens.
    14498      */
    14499     uint32_t        cCallsLeft = pTb->Thrd.cCalls;
    14500 #ifdef LOG_ENABLED
    14501     uint32_t const  cCallsOrg  = cCallsLeft;
    14502 #endif
    14503     uint32_t        off        = 0;
    14504     int             rc         = VINF_SUCCESS;
    14505     IEMNATIVE_TRY_SETJMP(pReNative, rc)
    14506     {
    14507         /*
    14508          * Emit prolog code (fixed).
    14509          */
    14510         off = iemNativeEmitProlog(pReNative, off);
    14511 
    14512         /*
    14513          * Convert the calls to native code.
    14514          */
    14515 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    14516         int32_t              iGstInstr        = -1;
    14517 #endif
    14518 #ifndef VBOX_WITHOUT_RELEASE_STATISTICS
    14519         uint32_t             cThreadedCalls   = 0;
    14520         uint32_t             cRecompiledCalls = 0;
    14521 #endif
    14522 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
    14523         uint32_t             idxCurCall       = 0;
    14524 #endif
    14525         PCIEMTHRDEDCALLENTRY pCallEntry       = pTb->Thrd.paCalls;
    14526         pReNative->fExec                      = pTb->fFlags & IEMTB_F_IEM_F_MASK;
    14527         while (cCallsLeft-- > 0)
    14528         {
    14529             PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
    14530 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    14531             pReNative->idxCurCall                 = idxCurCall;
    14532 #endif
    14533 
    14534             /*
    14535              * Debug info, assembly markup and statistics.
    14536              */
    14537 #if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
    14538             if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
    14539                 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
    14540 #endif
    14541 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    14542             iemNativeDbgInfoAddNativeOffset(pReNative, off);
    14543             if (iGstInstr < (int32_t)pCallEntry->idxInstr)
    14544             {
    14545                 if (iGstInstr < (int32_t)pTb->cInstructions)
    14546                     iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
    14547                 else
    14548                     Assert(iGstInstr == pTb->cInstructions);
    14549                 iGstInstr = pCallEntry->idxInstr;
    14550             }
    14551             iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
    14552 #endif
    14553 #if defined(VBOX_STRICT)
    14554             off = iemNativeEmitMarker(pReNative, off,
    14555                                       RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
    14556 #endif
    14557 #if defined(VBOX_STRICT)
    14558             iemNativeRegAssertSanity(pReNative);
    14559 #endif
    14560 #ifdef VBOX_WITH_STATISTICS
    14561             off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
    14562 #endif
    14563 
    14564             /*
    14565              * Actual work.
    14566              */
    14567             Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
    14568                   pfnRecom ? "(recompiled)" : "(todo)"));
    14569             if (pfnRecom) /** @todo stats on this.   */
    14570             {
    14571                 off = pfnRecom(pReNative, off, pCallEntry);
    14572                 STAM_REL_STATS({cRecompiledCalls++;});
    14573             }
    14574             else
    14575             {
    14576                 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
    14577                 STAM_REL_STATS({cThreadedCalls++;});
    14578             }
    14579             Assert(off <= pReNative->cInstrBufAlloc);
    14580             Assert(pReNative->cCondDepth == 0);
    14581 
    14582 #if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
    14583             if (LogIs2Enabled())
    14584             {
    14585                 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
    14586 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    14587                 static const char s_achState[] = "CUXI";
    14588 # else
    14589                 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
    14590 # endif
    14591 
    14592                 char szGpr[17];
    14593                 for (unsigned i = 0; i < 16; i++)
    14594                     szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
    14595                 szGpr[16] = '\0';
    14596 
    14597                 char szSegBase[X86_SREG_COUNT + 1];
    14598                 char szSegLimit[X86_SREG_COUNT + 1];
    14599                 char szSegAttrib[X86_SREG_COUNT + 1];
    14600                 char szSegSel[X86_SREG_COUNT + 1];
    14601                 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
    14602                 {
    14603                     szSegBase[i]   = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
    14604                     szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
    14605                     szSegLimit[i]  = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
    14606                     szSegSel[i]    = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
    14607                 }
    14608                 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
    14609                     = szSegSel[X86_SREG_COUNT] = '\0';
    14610 
    14611                 char szEFlags[8];
    14612                 for (unsigned i = 0; i < 7; i++)
    14613                     szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
    14614                 szEFlags[7] = '\0';
    14615 
    14616                 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
    14617                       szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
    14618             }
    14619 #endif
    14620 
    14621             /*
    14622              * Advance.
    14623              */
    14624             pCallEntry++;
    14625 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
    14626             idxCurCall++;
    14627 #endif
    14628         }
    14629 
    14630         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
    14631         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded,   cThreadedCalls);
    14632         if (!cThreadedCalls)
    14633             STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
    14634 
    14635         /*
    14636          * Emit the epilog code.
    14637          */
    14638         uint32_t idxReturnLabel;
    14639         off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
    14640 
    14641         /*
    14642          * Generate special jump labels.
    14643          */
    14644         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
    14645             off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
    14646         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
    14647             off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
    14648         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
    14649             off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
    14650         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
    14651             off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
    14652         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
    14653             off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
    14654         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
    14655             off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
    14656         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
    14657             off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
    14658         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
    14659             off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
    14660     }
    14661     IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
    14662     {
    14663         Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
    14664         return pTb;
    14665     }
    14666     IEMNATIVE_CATCH_LONGJMP_END(pReNative);
    14667     Assert(off <= pReNative->cInstrBufAlloc);
    14668 
    14669     /*
    14670      * Make sure all labels has been defined.
    14671      */
    14672     PIEMNATIVELABEL const paLabels = pReNative->paLabels;
    14673 #ifdef VBOX_STRICT
    14674     uint32_t const        cLabels  = pReNative->cLabels;
    14675     for (uint32_t i = 0; i < cLabels; i++)
    14676         AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
    14677 #endif
    14678 
    14679     /*
    14680      * Allocate executable memory, copy over the code we've generated.
    14681      */
    14682     PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
    14683     if (pTbAllocator->pDelayedFreeHead)
    14684         iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
    14685 
    14686     PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
    14687     AssertReturn(paFinalInstrBuf, pTb);
    14688     memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
    14689 
    14690     /*
    14691      * Apply fixups.
    14692      */
    14693     PIEMNATIVEFIXUP const paFixups   = pReNative->paFixups;
    14694     uint32_t const        cFixups    = pReNative->cFixups;
    14695     for (uint32_t i = 0; i < cFixups; i++)
    14696     {
    14697         Assert(paFixups[i].off < off);
    14698         Assert(paFixups[i].idxLabel < cLabels);
    14699         AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
    14700                   ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
    14701                    paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
    14702         RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
    14703         switch (paFixups[i].enmType)
    14704         {
    14705 #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
    14706             case kIemNativeFixupType_Rel32:
    14707                 Assert(paFixups[i].off + 4 <= off);
    14708                 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    14709                 continue;
    14710 
    14711 #elif defined(RT_ARCH_ARM64)
    14712             case kIemNativeFixupType_RelImm26At0:
    14713             {
    14714                 Assert(paFixups[i].off < off);
    14715                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    14716                 Assert(offDisp >= -262144 && offDisp < 262144);
    14717                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
    14718                 continue;
    14719             }
    14720 
    14721             case kIemNativeFixupType_RelImm19At5:
    14722             {
    14723                 Assert(paFixups[i].off < off);
    14724                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    14725                 Assert(offDisp >= -262144 && offDisp < 262144);
    14726                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
    14727                 continue;
    14728             }
    14729 
    14730             case kIemNativeFixupType_RelImm14At5:
    14731             {
    14732                 Assert(paFixups[i].off < off);
    14733                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    14734                 Assert(offDisp >= -8192 && offDisp < 8192);
    14735                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
    14736                 continue;
    14737             }
    14738 
    14739 #endif
    14740             case kIemNativeFixupType_Invalid:
    14741             case kIemNativeFixupType_End:
    14742                 break;
    14743         }
    14744         AssertFailed();
    14745     }
    14746 
    14747     iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
    14748     STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
    14749 
    14750     /*
    14751      * Convert the translation block.
    14752      */
    14753     RTMemFree(pTb->Thrd.paCalls);
    14754     pTb->Native.paInstructions  = paFinalInstrBuf;
    14755     pTb->Native.cInstructions   = off;
    14756     pTb->fFlags                 = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
    14757 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    14758     pTb->pDbgInfo               = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
    14759                                                       RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
    14760 #endif
    14761 
    14762     Assert(pTbAllocator->cThreadedTbs > 0);
    14763     pTbAllocator->cThreadedTbs -= 1;
    14764     pTbAllocator->cNativeTbs   += 1;
    14765     Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
    14766 
    14767 #ifdef LOG_ENABLED
    14768     /*
    14769      * Disassemble to the log if enabled.
    14770      */
    14771     if (LogIs3Enabled())
    14772     {
    14773         Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
    14774         iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
    14775 # ifdef DEBUG_bird
    14776         RTLogFlush(NULL);
    14777 # endif
    14778     }
    14779 #endif
    14780     /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
    14781 
    14782     STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
    14783     return pTb;
    14784 }
    14785 
     186#endif /* !VMM_INCLUDED_SRC_VMMAll_target_x86_IEMAllN8veEmit_x86_h */
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette