VirtualBox

Changeset 104115 in vbox for trunk/src/VBox


Ignore:
Timestamp:
Mar 29, 2024 2:11:56 AM (12 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162522
Message:

VMM/IEM: Split out the executable memory allocator from IEMAllN8veRecompiler.cpp and into a separate file, IEMAllN8veExecMem.cpp. bugref:10370

Location:
trunk/src/VBox/VMM
Files:
3 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/Makefile.kmk

    r103808 r104115  
    262262 ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
    263263  VBoxVMM_SOURCES += \
     264        VMMAll/IEMAllN8veExecMem.cpp \
    264265        VMMAll/IEMAllN8veRecompiler.cpp \
    265266        VMMAll/IEMAllN8veRecompFuncs1.cpp \
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp

    r104114 r104115  
    4747#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
    4848#define IEM_WITH_OPAQUE_DECODER_STATE
    49 #define VMCPU_INCL_CPUM_GST_CTX
    5049#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
    5150#include <VBox/vmm/iem.h>
    5251#include <VBox/vmm/cpum.h>
    53 #include <VBox/vmm/dbgf.h>
    5452#include "IEMInternal.h"
    5553#include <VBox/vmm/vmcc.h>
    5654#include <VBox/log.h>
    5755#include <VBox/err.h>
    58 #include <VBox/dis.h>
    5956#include <VBox/param.h>
    6057#include <iprt/assert.h>
     
    9188# endif
    9289#endif
    93 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    94 # include "/opt/local/include/capstone/capstone.h"
    95 #endif
    96 
    97 #include "IEMInline.h"
    98 #include "IEMThreadedFunctions.h"
     90
    9991#include "IEMN8veRecompiler.h"
    100 #include "IEMN8veRecompilerEmit.h"
    101 #include "IEMN8veRecompilerTlbLookup.h"
    102 #include "IEMNativeFunctions.h"
    103 
    104 
    105 /*
    106  * Narrow down configs here to avoid wasting time on unused configs here.
    107  * Note! Same checks in IEMAllThrdRecompiler.cpp.
    108  */
    109 
    110 #ifndef IEM_WITH_CODE_TLB
    111 # error The code TLB must be enabled for the recompiler.
    112 #endif
    113 
    114 #ifndef IEM_WITH_DATA_TLB
    115 # error The data TLB must be enabled for the recompiler.
    116 #endif
    117 
    118 #ifndef IEM_WITH_SETJMP
    119 # error The setjmp approach must be enabled for the recompiler.
    120 #endif
    121 
    122 /** @todo eliminate this clang build hack. */
    123 #if RT_CLANG_PREREQ(4, 0)
    124 # pragma GCC diagnostic ignored "-Wunused-function"
    125 #endif
    126 
    127 
    128 /*********************************************************************************************************************************
    129 *   Internal Functions                                                                                                           *
    130 *********************************************************************************************************************************/
    131 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    132 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
    133 #endif
    134 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
    135 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
    136                                                             IEMNATIVEGSTREG enmGstReg, uint32_t off);
    137 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
    13892
    13993
     
    624578 * @param   pTb     The translation block that will be using the allocation.
    625579 */
    626 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb)
     580DECLHIDDEN(void *) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) RT_NOEXCEPT
    627581{
    628582    PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
     
    700654/** This is a hook that we may need later for changing memory protection back
    701655 *  to readonly+exec */
    702 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
     656DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
    703657{
    704658#ifdef RT_OS_DARWIN
     
    722676 * Frees executable memory.
    723677 */
    724 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
     678DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
    725679{
    726680    PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
     
    14841438 *                      dependent).
    14851439 */
    1486 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
     1440int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
    14871441{
    14881442    /*
     
    16121566}
    16131567
    1614 
    1615 /*********************************************************************************************************************************
    1616 *   Native Recompilation                                                                                                         *
    1617 *********************************************************************************************************************************/
    1618 
    1619 
    1620 /**
    1621  * Used by TB code when encountering a non-zero status or rcPassUp after a call.
    1622  */
    1623 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
    1624 {
    1625     pVCpu->iem.s.cInstructions += idxInstr;
    1626     return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
    1627 }
    1628 
    1629 
    1630 /**
    1631  * Used by TB code when it wants to raise a \#DE.
    1632  */
    1633 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
    1634 {
    1635     iemRaiseDivideErrorJmp(pVCpu);
    1636 #ifndef _MSC_VER
    1637     return VINF_IEM_RAISED_XCPT; /* not reached */
    1638 #endif
    1639 }
    1640 
    1641 
    1642 /**
    1643  * Used by TB code when it wants to raise a \#UD.
    1644  */
    1645 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
    1646 {
    1647     iemRaiseUndefinedOpcodeJmp(pVCpu);
    1648 #ifndef _MSC_VER
    1649     return VINF_IEM_RAISED_XCPT; /* not reached */
    1650 #endif
    1651 }
    1652 
    1653 
    1654 /**
    1655  * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
    1656  *
    1657  * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
    1658  */
    1659 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
    1660 {
    1661     if (   (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
    1662         || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
    1663         iemRaiseUndefinedOpcodeJmp(pVCpu);
    1664     else
    1665         iemRaiseDeviceNotAvailableJmp(pVCpu);
    1666 #ifndef _MSC_VER
    1667     return VINF_IEM_RAISED_XCPT; /* not reached */
    1668 #endif
    1669 }
    1670 
    1671 
    1672 /**
    1673  * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
    1674  *
    1675  * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
    1676  */
    1677 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
    1678 {
    1679     if (   (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
    1680         || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
    1681         iemRaiseUndefinedOpcodeJmp(pVCpu);
    1682     else
    1683         iemRaiseDeviceNotAvailableJmp(pVCpu);
    1684 #ifndef _MSC_VER
    1685     return VINF_IEM_RAISED_XCPT; /* not reached */
    1686 #endif
    1687 }
    1688 
    1689 
    1690 /**
    1691  * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
    1692  *
    1693  * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
    1694  */
    1695 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
    1696 {
    1697     if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
    1698         iemRaiseSimdFpExceptionJmp(pVCpu);
    1699     else
    1700         iemRaiseUndefinedOpcodeJmp(pVCpu);
    1701 #ifndef _MSC_VER
    1702     return VINF_IEM_RAISED_XCPT; /* not reached */
    1703 #endif
    1704 }
    1705 
    1706 
    1707 /**
    1708  * Used by TB code when it wants to raise a \#NM.
    1709  */
    1710 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
    1711 {
    1712     iemRaiseDeviceNotAvailableJmp(pVCpu);
    1713 #ifndef _MSC_VER
    1714     return VINF_IEM_RAISED_XCPT; /* not reached */
    1715 #endif
    1716 }
    1717 
    1718 
    1719 /**
    1720  * Used by TB code when it wants to raise a \#GP(0).
    1721  */
    1722 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
    1723 {
    1724     iemRaiseGeneralProtectionFault0Jmp(pVCpu);
    1725 #ifndef _MSC_VER
    1726     return VINF_IEM_RAISED_XCPT; /* not reached */
    1727 #endif
    1728 }
    1729 
    1730 
    1731 /**
    1732  * Used by TB code when it wants to raise a \#MF.
    1733  */
    1734 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
    1735 {
    1736     iemRaiseMathFaultJmp(pVCpu);
    1737 #ifndef _MSC_VER
    1738     return VINF_IEM_RAISED_XCPT; /* not reached */
    1739 #endif
    1740 }
    1741 
    1742 
    1743 /**
    1744  * Used by TB code when it wants to raise a \#XF.
    1745  */
    1746 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
    1747 {
    1748     iemRaiseSimdFpExceptionJmp(pVCpu);
    1749 #ifndef _MSC_VER
    1750     return VINF_IEM_RAISED_XCPT; /* not reached */
    1751 #endif
    1752 }
    1753 
    1754 
    1755 /**
    1756  * Used by TB code when detecting opcode changes.
    1757  * @see iemThreadeFuncWorkerObsoleteTb
    1758  */
    1759 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
    1760 {
    1761     /* We set fSafeToFree to false where as we're being called in the context
    1762        of a TB callback function, which for native TBs means we cannot release
    1763        the executable memory till we've returned our way back to iemTbExec as
    1764        that return path codes via the native code generated for the TB. */
    1765     Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
    1766     iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
    1767     return VINF_IEM_REEXEC_BREAK;
    1768 }
    1769 
    1770 
    1771 /**
    1772  * Used by TB code when we need to switch to a TB with CS.LIM checking.
    1773  */
    1774 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
    1775 {
    1776     Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
    1777           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1778           (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
    1779           pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
    1780     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
    1781     return VINF_IEM_REEXEC_BREAK;
    1782 }
    1783 
    1784 
    1785 /**
    1786  * Used by TB code when we missed a PC check after a branch.
    1787  */
    1788 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
    1789 {
    1790     Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
    1791           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1792           pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
    1793           pVCpu->iem.s.pbInstrBuf));
    1794     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
    1795     return VINF_IEM_REEXEC_BREAK;
    1796 }
    1797 
    1798 
    1799 
    1800 /*********************************************************************************************************************************
    1801 *   Helpers: Segmented memory fetches and stores.                                                                                *
    1802 *********************************************************************************************************************************/
    1803 
    1804 /**
    1805  * Used by TB code to load unsigned 8-bit data w/ segmentation.
    1806  */
    1807 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1808 {
    1809 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1810     return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1811 #else
    1812     return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1813 #endif
    1814 }
    1815 
    1816 
    1817 /**
    1818  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1819  * to 16 bits.
    1820  */
    1821 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1822 {
    1823 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1824     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1825 #else
    1826     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1827 #endif
    1828 }
    1829 
    1830 
    1831 /**
    1832  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1833  * to 32 bits.
    1834  */
    1835 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1836 {
    1837 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1838     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1839 #else
    1840     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1841 #endif
    1842 }
    1843 
    1844 /**
    1845  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1846  * to 64 bits.
    1847  */
    1848 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1849 {
    1850 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1851     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1852 #else
    1853     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1854 #endif
    1855 }
    1856 
    1857 
    1858 /**
    1859  * Used by TB code to load unsigned 16-bit data w/ segmentation.
    1860  */
    1861 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1862 {
    1863 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1864     return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1865 #else
    1866     return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1867 #endif
    1868 }
    1869 
    1870 
    1871 /**
    1872  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1873  * to 32 bits.
    1874  */
    1875 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1876 {
    1877 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1878     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1879 #else
    1880     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1881 #endif
    1882 }
    1883 
    1884 
    1885 /**
    1886  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1887  * to 64 bits.
    1888  */
    1889 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1890 {
    1891 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1892     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1893 #else
    1894     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1895 #endif
    1896 }
    1897 
    1898 
    1899 /**
    1900  * Used by TB code to load unsigned 32-bit data w/ segmentation.
    1901  */
    1902 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1903 {
    1904 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1905     return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1906 #else
    1907     return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
    1908 #endif
    1909 }
    1910 
    1911 
    1912 /**
    1913  * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
    1914  * to 64 bits.
    1915  */
    1916 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1917 {
    1918 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1919     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1920 #else
    1921     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
    1922 #endif
    1923 }
    1924 
    1925 
    1926 /**
    1927  * Used by TB code to load unsigned 64-bit data w/ segmentation.
    1928  */
    1929 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1930 {
    1931 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1932     return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1933 #else
    1934     return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
    1935 #endif
    1936 }
    1937 
    1938 
    1939 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    1940 /**
    1941  * Used by TB code to load 128-bit data w/ segmentation.
    1942  */
    1943 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
    1944 {
    1945 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1946     iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
    1947 #else
    1948     iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
    1949 #endif
    1950 }
    1951 
    1952 
    1953 /**
    1954  * Used by TB code to load 128-bit data w/ segmentation.
    1955  */
    1956 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
    1957 {
    1958 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1959     iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
    1960 #else
    1961     iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
    1962 #endif
    1963 }
    1964 
    1965 
    1966 /**
    1967  * Used by TB code to load 128-bit data w/ segmentation.
    1968  */
    1969 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
    1970 {
    1971 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1972     iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
    1973 #else
    1974     iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
    1975 #endif
    1976 }
    1977 
    1978 
    1979 /**
    1980  * Used by TB code to load 256-bit data w/ segmentation.
    1981  */
    1982 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
    1983 {
    1984 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1985     iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
    1986 #else
    1987     iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
    1988 #endif
    1989 }
    1990 
    1991 
    1992 /**
    1993  * Used by TB code to load 256-bit data w/ segmentation.
    1994  */
    1995 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
    1996 {
    1997 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1998     iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
    1999 #else
    2000     iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
    2001 #endif
    2002 }
    2003 #endif
    2004 
    2005 
    2006 /**
    2007  * Used by TB code to store unsigned 8-bit data w/ segmentation.
    2008  */
    2009 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
    2010 {
    2011 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2012     iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
    2013 #else
    2014     iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
    2015 #endif
    2016 }
    2017 
    2018 
    2019 /**
    2020  * Used by TB code to store unsigned 16-bit data w/ segmentation.
    2021  */
    2022 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
    2023 {
    2024 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2025     iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
    2026 #else
    2027     iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
    2028 #endif
    2029 }
    2030 
    2031 
    2032 /**
    2033  * Used by TB code to store unsigned 32-bit data w/ segmentation.
    2034  */
    2035 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
    2036 {
    2037 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2038     iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
    2039 #else
    2040     iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
    2041 #endif
    2042 }
    2043 
    2044 
    2045 /**
    2046  * Used by TB code to store unsigned 64-bit data w/ segmentation.
    2047  */
    2048 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
    2049 {
    2050 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2051     iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
    2052 #else
    2053     iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
    2054 #endif
    2055 }
    2056 
    2057 
    2058 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    2059 /**
    2060  * Used by TB code to store unsigned 128-bit data w/ segmentation.
    2061  */
    2062 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
    2063 {
    2064 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2065     iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
    2066 #else
    2067     iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
    2068 #endif
    2069 }
    2070 
    2071 
    2072 /**
    2073  * Used by TB code to store unsigned 128-bit data w/ segmentation.
    2074  */
    2075 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
    2076 {
    2077 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2078     iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
    2079 #else
    2080     iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
    2081 #endif
    2082 }
    2083 
    2084 
    2085 /**
    2086  * Used by TB code to store unsigned 256-bit data w/ segmentation.
    2087  */
    2088 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
    2089 {
    2090 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2091     iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
    2092 #else
    2093     iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
    2094 #endif
    2095 }
    2096 
    2097 
    2098 /**
    2099  * Used by TB code to store unsigned 256-bit data w/ segmentation.
    2100  */
    2101 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
    2102 {
    2103 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2104     iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
    2105 #else
    2106     iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
    2107 #endif
    2108 }
    2109 #endif
    2110 
    2111 
    2112 
    2113 /**
    2114  * Used by TB code to store an unsigned 16-bit value onto a generic stack.
    2115  */
    2116 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2117 {
    2118 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2119     iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
    2120 #else
    2121     iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
    2122 #endif
    2123 }
    2124 
    2125 
    2126 /**
    2127  * Used by TB code to store an unsigned 32-bit value onto a generic stack.
    2128  */
    2129 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2130 {
    2131 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2132     iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
    2133 #else
    2134     iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
    2135 #endif
    2136 }
    2137 
    2138 
    2139 /**
    2140  * Used by TB code to store an 32-bit selector value onto a generic stack.
    2141  *
    2142  * Intel CPUs doesn't do write a whole dword, thus the special function.
    2143  */
    2144 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2145 {
    2146 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2147     iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
    2148 #else
    2149     iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
    2150 #endif
    2151 }
    2152 
    2153 
    2154 /**
    2155  * Used by TB code to push unsigned 64-bit value onto a generic stack.
    2156  */
    2157 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2158 {
    2159 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2160     iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
    2161 #else
    2162     iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
    2163 #endif
    2164 }
    2165 
    2166 
    2167 /**
    2168  * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
    2169  */
    2170 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2171 {
    2172 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2173     return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
    2174 #else
    2175     return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
    2176 #endif
    2177 }
    2178 
    2179 
    2180 /**
    2181  * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
    2182  */
    2183 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2184 {
    2185 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2186     return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
    2187 #else
    2188     return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
    2189 #endif
    2190 }
    2191 
    2192 
    2193 /**
    2194  * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
    2195  */
    2196 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2197 {
    2198 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2199     return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
    2200 #else
    2201     return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
    2202 #endif
    2203 }
    2204 
    2205 
    2206 
    2207 /*********************************************************************************************************************************
    2208 *   Helpers: Flat memory fetches and stores.                                                                                     *
    2209 *********************************************************************************************************************************/
    2210 
    2211 /**
    2212  * Used by TB code to load unsigned 8-bit data w/ flat address.
    2213  * @note Zero extending the value to 64-bit to simplify assembly.
    2214  */
    2215 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2216 {
    2217 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2218     return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2219 #else
    2220     return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2221 #endif
    2222 }
    2223 
    2224 
    2225 /**
    2226  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    2227  * to 16 bits.
    2228  * @note Zero extending the value to 64-bit to simplify assembly.
    2229  */
    2230 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2231 {
    2232 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2233     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2234 #else
    2235     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2236 #endif
    2237 }
    2238 
    2239 
    2240 /**
    2241  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    2242  * to 32 bits.
    2243  * @note Zero extending the value to 64-bit to simplify assembly.
    2244  */
    2245 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2246 {
    2247 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2248     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2249 #else
    2250     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2251 #endif
    2252 }
    2253 
    2254 
    2255 /**
    2256  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    2257  * to 64 bits.
    2258  */
    2259 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2260 {
    2261 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2262     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2263 #else
    2264     return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2265 #endif
    2266 }
    2267 
    2268 
    2269 /**
    2270  * Used by TB code to load unsigned 16-bit data w/ flat address.
    2271  * @note Zero extending the value to 64-bit to simplify assembly.
    2272  */
    2273 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2274 {
    2275 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2276     return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2277 #else
    2278     return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2279 #endif
    2280 }
    2281 
    2282 
    2283 /**
    2284  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    2285  * to 32 bits.
    2286  * @note Zero extending the value to 64-bit to simplify assembly.
    2287  */
    2288 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2289 {
    2290 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2291     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2292 #else
    2293     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2294 #endif
    2295 }
    2296 
    2297 
    2298 /**
    2299  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    2300  * to 64 bits.
    2301  * @note Zero extending the value to 64-bit to simplify assembly.
    2302  */
    2303 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2304 {
    2305 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2306     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2307 #else
    2308     return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2309 #endif
    2310 }
    2311 
    2312 
    2313 /**
    2314  * Used by TB code to load unsigned 32-bit data w/ flat address.
    2315  * @note Zero extending the value to 64-bit to simplify assembly.
    2316  */
    2317 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2318 {
    2319 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2320     return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2321 #else
    2322     return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
    2323 #endif
    2324 }
    2325 
    2326 
    2327 /**
    2328  * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
    2329  * to 64 bits.
    2330  * @note Zero extending the value to 64-bit to simplify assembly.
    2331  */
    2332 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2333 {
    2334 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2335     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2336 #else
    2337     return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
    2338 #endif
    2339 }
    2340 
    2341 
    2342 /**
    2343  * Used by TB code to load unsigned 64-bit data w/ flat address.
    2344  */
    2345 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2346 {
    2347 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2348     return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2349 #else
    2350     return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
    2351 #endif
    2352 }
    2353 
    2354 
    2355 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    2356 /**
    2357  * Used by TB code to load unsigned 128-bit data w/ flat address.
    2358  */
    2359 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
    2360 {
    2361 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2362     return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
    2363 #else
    2364     return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
    2365 #endif
    2366 }
    2367 
    2368 
    2369 /**
    2370  * Used by TB code to load unsigned 128-bit data w/ flat address.
    2371  */
    2372 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
    2373 {
    2374 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2375     return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
    2376 #else
    2377     return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
    2378 #endif
    2379 }
    2380 
    2381 
    2382 /**
    2383  * Used by TB code to load unsigned 128-bit data w/ flat address.
    2384  */
    2385 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
    2386 {
    2387 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2388     return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
    2389 #else
    2390     return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
    2391 #endif
    2392 }
    2393 
    2394 
    2395 /**
    2396  * Used by TB code to load unsigned 256-bit data w/ flat address.
    2397  */
    2398 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
    2399 {
    2400 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2401     return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
    2402 #else
    2403     return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
    2404 #endif
    2405 }
    2406 
    2407 
    2408 /**
    2409  * Used by TB code to load unsigned 256-bit data w/ flat address.
    2410  */
    2411 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
    2412 {
    2413 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2414     return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
    2415 #else
    2416     return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
    2417 #endif
    2418 }
    2419 #endif
    2420 
    2421 
    2422 /**
    2423  * Used by TB code to store unsigned 8-bit data w/ flat address.
    2424  */
    2425 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
    2426 {
    2427 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2428     iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
    2429 #else
    2430     iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
    2431 #endif
    2432 }
    2433 
    2434 
    2435 /**
    2436  * Used by TB code to store unsigned 16-bit data w/ flat address.
    2437  */
    2438 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2439 {
    2440 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2441     iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
    2442 #else
    2443     iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
    2444 #endif
    2445 }
    2446 
    2447 
    2448 /**
    2449  * Used by TB code to store unsigned 32-bit data w/ flat address.
    2450  */
    2451 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2452 {
    2453 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2454     iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
    2455 #else
    2456     iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
    2457 #endif
    2458 }
    2459 
    2460 
    2461 /**
    2462  * Used by TB code to store unsigned 64-bit data w/ flat address.
    2463  */
    2464 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2465 {
    2466 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2467     iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
    2468 #else
    2469     iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
    2470 #endif
    2471 }
    2472 
    2473 
    2474 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    2475 /**
    2476  * Used by TB code to store unsigned 128-bit data w/ flat address.
    2477  */
    2478 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
    2479 {
    2480 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2481     iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
    2482 #else
    2483     iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
    2484 #endif
    2485 }
    2486 
    2487 
    2488 /**
    2489  * Used by TB code to store unsigned 128-bit data w/ flat address.
    2490  */
    2491 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
    2492 {
    2493 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2494     iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
    2495 #else
    2496     iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
    2497 #endif
    2498 }
    2499 
    2500 
    2501 /**
    2502  * Used by TB code to store unsigned 256-bit data w/ flat address.
    2503  */
    2504 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
    2505 {
    2506 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2507     iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
    2508 #else
    2509     iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
    2510 #endif
    2511 }
    2512 
    2513 
    2514 /**
    2515  * Used by TB code to store unsigned 256-bit data w/ flat address.
    2516  */
    2517 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
    2518 {
    2519 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2520     iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
    2521 #else
    2522     iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
    2523 #endif
    2524 }
    2525 #endif
    2526 
    2527 
    2528 
    2529 /**
    2530  * Used by TB code to store an unsigned 16-bit value onto a flat stack.
    2531  */
    2532 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2533 {
    2534 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2535     iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
    2536 #else
    2537     iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
    2538 #endif
    2539 }
    2540 
    2541 
    2542 /**
    2543  * Used by TB code to store an unsigned 32-bit value onto a flat stack.
    2544  */
    2545 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2546 {
    2547 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2548     iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
    2549 #else
    2550     iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
    2551 #endif
    2552 }
    2553 
    2554 
    2555 /**
    2556  * Used by TB code to store a segment selector value onto a flat stack.
    2557  *
    2558  * Intel CPUs doesn't do write a whole dword, thus the special function.
    2559  */
    2560 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2561 {
    2562 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2563     iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
    2564 #else
    2565     iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
    2566 #endif
    2567 }
    2568 
    2569 
    2570 /**
    2571  * Used by TB code to store an unsigned 64-bit value onto a flat stack.
    2572  */
    2573 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2574 {
    2575 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2576     iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
    2577 #else
    2578     iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
    2579 #endif
    2580 }
    2581 
    2582 
    2583 /**
    2584  * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
    2585  */
    2586 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2587 {
    2588 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2589     return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
    2590 #else
    2591     return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
    2592 #endif
    2593 }
    2594 
    2595 
    2596 /**
    2597  * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
    2598  */
    2599 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2600 {
    2601 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2602     return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
    2603 #else
    2604     return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
    2605 #endif
    2606 }
    2607 
    2608 
    2609 /**
    2610  * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
    2611  */
    2612 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2613 {
    2614 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2615     return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
    2616 #else
    2617     return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
    2618 #endif
    2619 }
    2620 
    2621 
    2622 
    2623 /*********************************************************************************************************************************
    2624 *   Helpers: Segmented memory mapping.                                                                                           *
    2625 *********************************************************************************************************************************/
    2626 
    2627 /**
    2628  * Used by TB code to map unsigned 8-bit data for atomic read-write w/
    2629  * segmentation.
    2630  */
    2631 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2632                                                                    RTGCPTR GCPtrMem, uint8_t iSegReg))
    2633 {
    2634 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2635     return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2636 #else
    2637     return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2638 #endif
    2639 }
    2640 
    2641 
    2642 /**
    2643  * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
    2644  */
    2645 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2646                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2647 {
    2648 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2649     return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2650 #else
    2651     return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2652 #endif
    2653 }
    2654 
    2655 
    2656 /**
    2657  * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
    2658  */
    2659 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2660                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2661 {
    2662 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2663     return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2664 #else
    2665     return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2666 #endif
    2667 }
    2668 
    2669 
    2670 /**
    2671  * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
    2672  */
    2673 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2674                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2675 {
    2676 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2677     return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2678 #else
    2679     return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2680 #endif
    2681 }
    2682 
    2683 
    2684 /**
    2685  * Used by TB code to map unsigned 16-bit data for atomic read-write w/
    2686  * segmentation.
    2687  */
    2688 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2689                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2690 {
    2691 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2692     return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2693 #else
    2694     return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2695 #endif
    2696 }
    2697 
    2698 
    2699 /**
    2700  * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
    2701  */
    2702 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2703                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2704 {
    2705 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2706     return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2707 #else
    2708     return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2709 #endif
    2710 }
    2711 
    2712 
    2713 /**
    2714  * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
    2715  */
    2716 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2717                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2718 {
    2719 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2720     return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2721 #else
    2722     return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2723 #endif
    2724 }
    2725 
    2726 
    2727 /**
    2728  * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
    2729  */
    2730 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2731                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2732 {
    2733 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2734     return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2735 #else
    2736     return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2737 #endif
    2738 }
    2739 
    2740 
    2741 /**
    2742  * Used by TB code to map unsigned 32-bit data for atomic read-write w/
    2743  * segmentation.
    2744  */
    2745 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2746                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2747 {
    2748 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2749     return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2750 #else
    2751     return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2752 #endif
    2753 }
    2754 
    2755 
    2756 /**
    2757  * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
    2758  */
    2759 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2760                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2761 {
    2762 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2763     return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2764 #else
    2765     return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2766 #endif
    2767 }
    2768 
    2769 
    2770 /**
    2771  * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
    2772  */
    2773 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2774                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2775 {
    2776 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2777     return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2778 #else
    2779     return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2780 #endif
    2781 }
    2782 
    2783 
    2784 /**
    2785  * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
    2786  */
    2787 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2788                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2789 {
    2790 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2791     return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2792 #else
    2793     return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2794 #endif
    2795 }
    2796 
    2797 
    2798 /**
    2799  * Used by TB code to map unsigned 64-bit data for atomic read-write w/
    2800  * segmentation.
    2801  */
    2802 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2803                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2804 {
    2805 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2806     return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2807 #else
    2808     return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2809 #endif
    2810 }
    2811 
    2812 
    2813 /**
    2814  * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
    2815  */
    2816 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2817                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2818 {
    2819 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2820     return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2821 #else
    2822     return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2823 #endif
    2824 }
    2825 
    2826 
    2827 /**
    2828  * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
    2829  */
    2830 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2831                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2832 {
    2833 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2834     return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2835 #else
    2836     return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2837 #endif
    2838 }
    2839 
    2840 
    2841 /**
    2842  * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
    2843  */
    2844 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2845                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2846 {
    2847 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2848     return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2849 #else
    2850     return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2851 #endif
    2852 }
    2853 
    2854 
    2855 /**
    2856  * Used by TB code to map 80-bit float data writeonly w/ segmentation.
    2857  */
    2858 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2859                                                                    RTGCPTR GCPtrMem, uint8_t iSegReg))
    2860 {
    2861 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2862     return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2863 #else
    2864     return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2865 #endif
    2866 }
    2867 
    2868 
    2869 /**
    2870  * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
    2871  */
    2872 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2873                                                                   RTGCPTR GCPtrMem, uint8_t iSegReg))
    2874 {
    2875 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2876     return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2877 #else
    2878     return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2879 #endif
    2880 }
    2881 
    2882 
    2883 /**
    2884  * Used by TB code to map unsigned 128-bit data for atomic read-write w/
    2885  * segmentation.
    2886  */
    2887 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2888                                                                         RTGCPTR GCPtrMem, uint8_t iSegReg))
    2889 {
    2890 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2891     return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2892 #else
    2893     return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2894 #endif
    2895 }
    2896 
    2897 
    2898 /**
    2899  * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
    2900  */
    2901 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2902                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2903 {
    2904 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2905     return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2906 #else
    2907     return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2908 #endif
    2909 }
    2910 
    2911 
    2912 /**
    2913  * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
    2914  */
    2915 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2916                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2917 {
    2918 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2919     return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2920 #else
    2921     return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2922 #endif
    2923 }
    2924 
    2925 
    2926 /**
    2927  * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
    2928  */
    2929 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2930                                                                           RTGCPTR GCPtrMem, uint8_t iSegReg))
    2931 {
    2932 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2933     return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2934 #else
    2935     return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2936 #endif
    2937 }
    2938 
    2939 
    2940 /*********************************************************************************************************************************
    2941 *   Helpers: Flat memory mapping.                                                                                                *
    2942 *********************************************************************************************************************************/
    2943 
    2944 /**
    2945  * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
    2946  * address.
    2947  */
    2948 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2949 {
    2950 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2951     return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2952 #else
    2953     return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2954 #endif
    2955 }
    2956 
    2957 
    2958 /**
    2959  * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
    2960  */
    2961 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2962 {
    2963 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2964     return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2965 #else
    2966     return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2967 #endif
    2968 }
    2969 
    2970 
    2971 /**
    2972  * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
    2973  */
    2974 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2975 {
    2976 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2977     return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2978 #else
    2979     return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2980 #endif
    2981 }
    2982 
    2983 
    2984 /**
    2985  * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
    2986  */
    2987 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2988 {
    2989 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2990     return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2991 #else
    2992     return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2993 #endif
    2994 }
    2995 
    2996 
    2997 /**
    2998  * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
    2999  * address.
    3000  */
    3001 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3002 {
    3003 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3004     return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3005 #else
    3006     return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3007 #endif
    3008 }
    3009 
    3010 
    3011 /**
    3012  * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
    3013  */
    3014 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3015 {
    3016 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3017     return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3018 #else
    3019     return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3020 #endif
    3021 }
    3022 
    3023 
    3024 /**
    3025  * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
    3026  */
    3027 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3028 {
    3029 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3030     return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3031 #else
    3032     return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3033 #endif
    3034 }
    3035 
    3036 
    3037 /**
    3038  * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
    3039  */
    3040 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3041 {
    3042 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3043     return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3044 #else
    3045     return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3046 #endif
    3047 }
    3048 
    3049 
    3050 /**
    3051  * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
    3052  * address.
    3053  */
    3054 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3055 {
    3056 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3057     return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3058 #else
    3059     return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3060 #endif
    3061 }
    3062 
    3063 
    3064 /**
    3065  * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
    3066  */
    3067 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3068 {
    3069 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3070     return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3071 #else
    3072     return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3073 #endif
    3074 }
    3075 
    3076 
    3077 /**
    3078  * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
    3079  */
    3080 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3081 {
    3082 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3083     return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3084 #else
    3085     return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3086 #endif
    3087 }
    3088 
    3089 
    3090 /**
    3091  * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
    3092  */
    3093 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3094 {
    3095 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3096     return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3097 #else
    3098     return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3099 #endif
    3100 }
    3101 
    3102 
    3103 /**
    3104  * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
    3105  * address.
    3106  */
    3107 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3108 {
    3109 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3110     return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3111 #else
    3112     return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3113 #endif
    3114 }
    3115 
    3116 
    3117 /**
    3118  * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
    3119  */
    3120 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3121 {
    3122 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3123     return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3124 #else
    3125     return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3126 #endif
    3127 }
    3128 
    3129 
    3130 /**
    3131  * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
    3132  */
    3133 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3134 {
    3135 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3136     return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3137 #else
    3138     return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3139 #endif
    3140 }
    3141 
    3142 
    3143 /**
    3144  * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
    3145  */
    3146 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3147 {
    3148 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3149     return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3150 #else
    3151     return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3152 #endif
    3153 }
    3154 
    3155 
    3156 /**
    3157  * Used by TB code to map 80-bit float data writeonly w/ flat address.
    3158  */
    3159 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3160 {
    3161 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3162     return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3163 #else
    3164     return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3165 #endif
    3166 }
    3167 
    3168 
    3169 /**
    3170  * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
    3171  */
    3172 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3173 {
    3174 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3175     return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3176 #else
    3177     return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3178 #endif
    3179 }
    3180 
    3181 
    3182 /**
    3183  * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
    3184  * address.
    3185  */
    3186 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3187 {
    3188 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3189     return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3190 #else
    3191     return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3192 #endif
    3193 }
    3194 
    3195 
    3196 /**
    3197  * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
    3198  */
    3199 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3200 {
    3201 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3202     return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3203 #else
    3204     return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3205 #endif
    3206 }
    3207 
    3208 
    3209 /**
    3210  * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
    3211  */
    3212 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3213 {
    3214 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3215     return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3216 #else
    3217     return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3218 #endif
    3219 }
    3220 
    3221 
    3222 /**
    3223  * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
    3224  */
    3225 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    3226 {
    3227 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    3228     return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    3229 #else
    3230     return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    3231 #endif
    3232 }
    3233 
    3234 
    3235 /*********************************************************************************************************************************
    3236 *   Helpers: Commit, rollback & unmap                                                                                            *
    3237 *********************************************************************************************************************************/
    3238 
    3239 /**
    3240  * Used by TB code to commit and unmap a read-write memory mapping.
    3241  */
    3242 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    3243 {
    3244     return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
    3245 }
    3246 
    3247 
    3248 /**
    3249  * Used by TB code to commit and unmap a read-write memory mapping.
    3250  */
    3251 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    3252 {
    3253     return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
    3254 }
    3255 
    3256 
    3257 /**
    3258  * Used by TB code to commit and unmap a write-only memory mapping.
    3259  */
    3260 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    3261 {
    3262     return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
    3263 }
    3264 
    3265 
    3266 /**
    3267  * Used by TB code to commit and unmap a read-only memory mapping.
    3268  */
    3269 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    3270 {
    3271     return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
    3272 }
    3273 
    3274 
    3275 /**
    3276  * Reinitializes the native recompiler state.
    3277  *
    3278  * Called before starting a new recompile job.
    3279  */
    3280 static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
    3281 {
    3282     pReNative->cLabels                     = 0;
    3283     pReNative->bmLabelTypes                = 0;
    3284     pReNative->cFixups                     = 0;
    3285 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3286     pReNative->pDbgInfo->cEntries          = 0;
    3287     pReNative->pDbgInfo->offNativeLast     = UINT32_MAX;
    3288 #endif
    3289     pReNative->pTbOrg                      = pTb;
    3290     pReNative->cCondDepth                  = 0;
    3291     pReNative->uCondSeqNo                  = 0;
    3292     pReNative->uCheckIrqSeqNo              = 0;
    3293     pReNative->uTlbSeqNo                   = 0;
    3294 
    3295 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    3296     pReNative->Core.offPc                  = 0;
    3297     pReNative->Core.cInstrPcUpdateSkipped  = 0;
    3298 #endif
    3299 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    3300     pReNative->fSimdRaiseXcptChecksEmitted = 0;
    3301 #endif
    3302     pReNative->Core.bmHstRegs              = IEMNATIVE_REG_FIXED_MASK
    3303 #if IEMNATIVE_HST_GREG_COUNT < 32
    3304                                            | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
    3305 #endif
    3306                                            ;
    3307     pReNative->Core.bmHstRegsWithGstShadow = 0;
    3308     pReNative->Core.bmGstRegShadows        = 0;
    3309 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    3310     pReNative->Core.bmGstRegShadowDirty    = 0;
    3311 #endif
    3312     pReNative->Core.bmVars                 = 0;
    3313     pReNative->Core.bmStack                = 0;
    3314     AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
    3315     pReNative->Core.u64ArgVars             = UINT64_MAX;
    3316 
    3317     AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
    3318     pReNative->aidxUniqueLabels[0]         = UINT32_MAX;
    3319     pReNative->aidxUniqueLabels[1]         = UINT32_MAX;
    3320     pReNative->aidxUniqueLabels[2]         = UINT32_MAX;
    3321     pReNative->aidxUniqueLabels[3]         = UINT32_MAX;
    3322     pReNative->aidxUniqueLabels[4]         = UINT32_MAX;
    3323     pReNative->aidxUniqueLabels[5]         = UINT32_MAX;
    3324     pReNative->aidxUniqueLabels[6]         = UINT32_MAX;
    3325     pReNative->aidxUniqueLabels[7]         = UINT32_MAX;
    3326     pReNative->aidxUniqueLabels[8]         = UINT32_MAX;
    3327     pReNative->aidxUniqueLabels[9]         = UINT32_MAX;
    3328     pReNative->aidxUniqueLabels[10]        = UINT32_MAX;
    3329     pReNative->aidxUniqueLabels[11]        = UINT32_MAX;
    3330     pReNative->aidxUniqueLabels[12]        = UINT32_MAX;
    3331     pReNative->aidxUniqueLabels[13]        = UINT32_MAX;
    3332     pReNative->aidxUniqueLabels[14]        = UINT32_MAX;
    3333     pReNative->aidxUniqueLabels[15]        = UINT32_MAX;
    3334     pReNative->aidxUniqueLabels[16]        = UINT32_MAX;
    3335 
    3336     /* Full host register reinit: */
    3337     for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
    3338     {
    3339         pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
    3340         pReNative->Core.aHstRegs[i].enmWhat        = kIemNativeWhat_Invalid;
    3341         pReNative->Core.aHstRegs[i].idxVar         = UINT8_MAX;
    3342     }
    3343 
    3344     uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
    3345                    & ~(  RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
    3346 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    3347                        | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
    3348 #endif
    3349 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    3350                        | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    3351 #endif
    3352 #ifdef IEMNATIVE_REG_FIXED_TMP1
    3353                        | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
    3354 #endif
    3355 #ifdef IEMNATIVE_REG_FIXED_PC_DBG
    3356                        | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
    3357 #endif
    3358                       );
    3359     for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
    3360     {
    3361         fRegs &= ~RT_BIT_32(idxReg);
    3362         pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
    3363     }
    3364 
    3365     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat     = kIemNativeWhat_pVCpuFixed;
    3366 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    3367     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat   = kIemNativeWhat_pCtxFixed;
    3368 #endif
    3369 #ifdef IEMNATIVE_REG_FIXED_TMP0
    3370     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat       = kIemNativeWhat_FixedTmp;
    3371 #endif
    3372 #ifdef IEMNATIVE_REG_FIXED_TMP1
    3373     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat       = kIemNativeWhat_FixedTmp;
    3374 #endif
    3375 #ifdef IEMNATIVE_REG_FIXED_PC_DBG
    3376     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat     = kIemNativeWhat_PcShadow;
    3377 #endif
    3378 
    3379 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    3380     pReNative->Core.bmHstSimdRegs          = IEMNATIVE_SIMD_REG_FIXED_MASK
    3381 # if IEMNATIVE_HST_SIMD_REG_COUNT < 32
    3382                                            | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
    3383 # endif
    3384                                            ;
    3385     pReNative->Core.bmHstSimdRegsWithGstShadow   = 0;
    3386     pReNative->Core.bmGstSimdRegShadows          = 0;
    3387     pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
    3388     pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
    3389 
    3390     /* Full host register reinit: */
    3391     for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
    3392     {
    3393         pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
    3394         pReNative->Core.aHstSimdRegs[i].enmWhat        = kIemNativeWhat_Invalid;
    3395         pReNative->Core.aHstSimdRegs[i].idxVar         = UINT8_MAX;
    3396         pReNative->Core.aHstSimdRegs[i].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    3397     }
    3398 
    3399     fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
    3400     for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
    3401     {
    3402         fRegs &= ~RT_BIT_32(idxReg);
    3403         pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
    3404     }
    3405 
    3406 #ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
    3407     pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
    3408 #endif
    3409 
    3410 #endif
    3411 
    3412     return pReNative;
    3413 }
    3414 
    3415 
    3416 /**
    3417  * Allocates and initializes the native recompiler state.
    3418  *
    3419  * This is called the first time an EMT wants to recompile something.
    3420  *
    3421  * @returns Pointer to the new recompiler state.
    3422  * @param   pVCpu   The cross context virtual CPU structure of the calling
    3423  *                  thread.
    3424  * @param   pTb     The TB that's about to be recompiled.
    3425  * @thread  EMT(pVCpu)
    3426  */
    3427 static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
    3428 {
    3429     VMCPU_ASSERT_EMT(pVCpu);
    3430 
    3431     PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
    3432     AssertReturn(pReNative, NULL);
    3433 
    3434     /*
    3435      * Try allocate all the buffers and stuff we need.
    3436      */
    3437     pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
    3438     pReNative->paLabels  = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
    3439     pReNative->paFixups  = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
    3440 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3441     pReNative->pDbgInfo  = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
    3442 #endif
    3443     if (RT_LIKELY(   pReNative->pInstrBuf
    3444                   && pReNative->paLabels
    3445                   && pReNative->paFixups)
    3446 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3447         && pReNative->pDbgInfo
    3448 #endif
    3449        )
    3450     {
    3451         /*
    3452          * Set the buffer & array sizes on success.
    3453          */
    3454         pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
    3455         pReNative->cLabelsAlloc   = _8K;
    3456         pReNative->cFixupsAlloc   = _16K;
    3457 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3458         pReNative->cDbgInfoAlloc  = _16K;
    3459 #endif
    3460 
    3461         /* Other constant stuff: */
    3462         pReNative->pVCpu          = pVCpu;
    3463 
    3464         /*
    3465          * Done, just need to save it and reinit it.
    3466          */
    3467         pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
    3468         return iemNativeReInit(pReNative, pTb);
    3469     }
    3470 
    3471     /*
    3472      * Failed. Cleanup and return.
    3473      */
    3474     AssertFailed();
    3475     RTMemFree(pReNative->pInstrBuf);
    3476     RTMemFree(pReNative->paLabels);
    3477     RTMemFree(pReNative->paFixups);
    3478 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3479     RTMemFree(pReNative->pDbgInfo);
    3480 #endif
    3481     RTMemFree(pReNative);
    3482     return NULL;
    3483 }
    3484 
    3485 
    3486 /**
    3487  * Creates a label
    3488  *
    3489  * If the label does not yet have a defined position,
    3490  * call iemNativeLabelDefine() later to set it.
    3491  *
    3492  * @returns Label ID. Throws VBox status code on failure, so no need to check
    3493  *          the return value.
    3494  * @param   pReNative   The native recompile state.
    3495  * @param   enmType     The label type.
    3496  * @param   offWhere    The instruction offset of the label.  UINT32_MAX if the
    3497  *                      label is not yet defined (default).
    3498  * @param   uData       Data associated with the lable. Only applicable to
    3499  *                      certain type of labels. Default is zero.
    3500  */
    3501 DECL_HIDDEN_THROW(uint32_t)
    3502 iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    3503                      uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
    3504 {
    3505     Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
    3506 
    3507     /*
    3508      * Locate existing label definition.
    3509      *
    3510      * This is only allowed for forward declarations where offWhere=UINT32_MAX
    3511      * and uData is zero.
    3512      */
    3513     PIEMNATIVELABEL paLabels = pReNative->paLabels;
    3514     uint32_t const  cLabels  = pReNative->cLabels;
    3515     if (   pReNative->bmLabelTypes & RT_BIT_64(enmType)
    3516 #ifndef VBOX_STRICT
    3517         && enmType  <  kIemNativeLabelType_FirstWithMultipleInstances
    3518         && offWhere == UINT32_MAX
    3519         && uData    == 0
    3520 #endif
    3521         )
    3522     {
    3523 #ifndef VBOX_STRICT
    3524         AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
    3525                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3526         uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
    3527         if (idxLabel < pReNative->cLabels)
    3528             return idxLabel;
    3529 #else
    3530         for (uint32_t i = 0; i < cLabels; i++)
    3531             if (   paLabels[i].enmType == enmType
    3532                 && paLabels[i].uData   == uData)
    3533             {
    3534                 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3535                 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3536                 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
    3537                 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
    3538                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3539                 return i;
    3540             }
    3541         AssertStmt(   enmType >= kIemNativeLabelType_FirstWithMultipleInstances
    3542                    || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    3543 #endif
    3544     }
    3545 
    3546     /*
    3547      * Make sure we've got room for another label.
    3548      */
    3549     if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
    3550     { /* likely */ }
    3551     else
    3552     {
    3553         uint32_t cNew = pReNative->cLabelsAlloc;
    3554         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    3555         AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    3556         cNew *= 2;
    3557         AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
    3558         paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
    3559         AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
    3560         pReNative->paLabels     = paLabels;
    3561         pReNative->cLabelsAlloc = cNew;
    3562     }
    3563 
    3564     /*
    3565      * Define a new label.
    3566      */
    3567     paLabels[cLabels].off     = offWhere;
    3568     paLabels[cLabels].enmType = enmType;
    3569     paLabels[cLabels].uData   = uData;
    3570     pReNative->cLabels = cLabels + 1;
    3571 
    3572     Assert((unsigned)enmType < 64);
    3573     pReNative->bmLabelTypes |= RT_BIT_64(enmType);
    3574 
    3575     if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    3576     {
    3577         Assert(uData == 0);
    3578         pReNative->aidxUniqueLabels[enmType] = cLabels;
    3579     }
    3580 
    3581     if (offWhere != UINT32_MAX)
    3582     {
    3583 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3584         iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    3585         iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
    3586 #endif
    3587     }
    3588     return cLabels;
    3589 }
    3590 
    3591 
    3592 /**
    3593  * Defines the location of an existing label.
    3594  *
    3595  * @param   pReNative   The native recompile state.
    3596  * @param   idxLabel    The label to define.
    3597  * @param   offWhere    The position.
    3598  */
    3599 DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
    3600 {
    3601     AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
    3602     PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
    3603     AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
    3604     pLabel->off = offWhere;
    3605 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3606     iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    3607     iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
    3608 #endif
    3609 }
    3610 
    3611 
    3612 /**
    3613  * Looks up a lable.
    3614  *
    3615  * @returns Label ID if found, UINT32_MAX if not.
    3616  */
    3617 static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    3618                                    uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
    3619 {
    3620     Assert((unsigned)enmType < 64);
    3621     if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
    3622     {
    3623         if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    3624             return pReNative->aidxUniqueLabels[enmType];
    3625 
    3626         PIEMNATIVELABEL paLabels = pReNative->paLabels;
    3627         uint32_t const  cLabels  = pReNative->cLabels;
    3628         for (uint32_t i = 0; i < cLabels; i++)
    3629             if (   paLabels[i].enmType == enmType
    3630                 && paLabels[i].uData   == uData
    3631                 && (   paLabels[i].off == offWhere
    3632                     || offWhere        == UINT32_MAX
    3633                     || paLabels[i].off == UINT32_MAX))
    3634                 return i;
    3635     }
    3636     return UINT32_MAX;
    3637 }
    3638 
    3639 
    3640 /**
    3641  * Adds a fixup.
    3642  *
    3643  * @throws  VBox status code (int) on failure.
    3644  * @param   pReNative   The native recompile state.
    3645  * @param   offWhere    The instruction offset of the fixup location.
    3646  * @param   idxLabel    The target label ID for the fixup.
    3647  * @param   enmType     The fixup type.
    3648  * @param   offAddend   Fixup addend if applicable to the type. Default is 0.
    3649  */
    3650 DECL_HIDDEN_THROW(void)
    3651 iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
    3652                   IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
    3653 {
    3654     Assert(idxLabel <= UINT16_MAX);
    3655     Assert((unsigned)enmType <= UINT8_MAX);
    3656 #ifdef RT_ARCH_ARM64
    3657     AssertStmt(   enmType != kIemNativeFixupType_RelImm14At5
    3658                || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
    3659                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
    3660 #endif
    3661 
    3662     /*
    3663      * Make sure we've room.
    3664      */
    3665     PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
    3666     uint32_t const  cFixups  = pReNative->cFixups;
    3667     if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
    3668     { /* likely */ }
    3669     else
    3670     {
    3671         uint32_t cNew = pReNative->cFixupsAlloc;
    3672         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    3673         AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    3674         cNew *= 2;
    3675         AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
    3676         paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
    3677         AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
    3678         pReNative->paFixups     = paFixups;
    3679         pReNative->cFixupsAlloc = cNew;
    3680     }
    3681 
    3682     /*
    3683      * Add the fixup.
    3684      */
    3685     paFixups[cFixups].off       = offWhere;
    3686     paFixups[cFixups].idxLabel  = (uint16_t)idxLabel;
    3687     paFixups[cFixups].enmType   = enmType;
    3688     paFixups[cFixups].offAddend = offAddend;
    3689     pReNative->cFixups = cFixups + 1;
    3690 }
    3691 
    3692 
    3693 /**
    3694  * Slow code path for iemNativeInstrBufEnsure.
    3695  */
    3696 DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
    3697 {
    3698     /* Double the buffer size till we meet the request. */
    3699     uint32_t cNew = pReNative->cInstrBufAlloc;
    3700     AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
    3701     do
    3702         cNew *= 2;
    3703     while (cNew < off + cInstrReq);
    3704 
    3705     uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
    3706 #ifdef RT_ARCH_ARM64
    3707     uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
    3708 #else
    3709     uint32_t const cbMaxInstrBuf = _2M;
    3710 #endif
    3711     AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
    3712 
    3713     void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
    3714     AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
    3715 
    3716 #ifdef VBOX_STRICT
    3717     pReNative->offInstrBufChecked = off + cInstrReq;
    3718 #endif
    3719     pReNative->cInstrBufAlloc     = cNew;
    3720     return pReNative->pInstrBuf   = (PIEMNATIVEINSTR)pvNew;
    3721 }
    3722 
    3723 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3724 
    3725 /**
    3726  * Grows the static debug info array used during recompilation.
    3727  *
    3728  * @returns Pointer to the new debug info block; throws VBox status code on
    3729  *          failure, so no need to check the return value.
    3730  */
    3731 DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    3732 {
    3733     uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
    3734     AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
    3735     pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
    3736     AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
    3737     pReNative->pDbgInfo      = pDbgInfo;
    3738     pReNative->cDbgInfoAlloc = cNew;
    3739     return pDbgInfo;
    3740 }
    3741 
    3742 
    3743 /**
    3744  * Adds a new debug info uninitialized entry, returning the pointer to it.
    3745  */
    3746 DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    3747 {
    3748     if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
    3749     { /* likely */ }
    3750     else
    3751         pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
    3752     return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
    3753 }
    3754 
    3755 
    3756 /**
    3757  * Debug Info: Adds a native offset record, if necessary.
    3758  */
    3759 DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    3760 {
    3761     PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
    3762 
    3763     /*
    3764      * Do we need this one?
    3765      */
    3766     uint32_t const offPrev = pDbgInfo->offNativeLast;
    3767     if (offPrev == off)
    3768         return;
    3769     AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
    3770 
    3771     /*
    3772      * Add it.
    3773      */
    3774     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
    3775     pEntry->NativeOffset.uType     = kIemTbDbgEntryType_NativeOffset;
    3776     pEntry->NativeOffset.offNative = off;
    3777     pDbgInfo->offNativeLast = off;
    3778 }
    3779 
    3780 
    3781 /**
    3782  * Debug Info: Record info about a label.
    3783  */
    3784 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
    3785 {
    3786     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3787     pEntry->Label.uType    = kIemTbDbgEntryType_Label;
    3788     pEntry->Label.uUnused  = 0;
    3789     pEntry->Label.enmLabel = (uint8_t)enmType;
    3790     pEntry->Label.uData    = uData;
    3791 }
    3792 
    3793 
    3794 /**
    3795  * Debug Info: Record info about a threaded call.
    3796  */
    3797 static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
    3798 {
    3799     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3800     pEntry->ThreadedCall.uType       = kIemTbDbgEntryType_ThreadedCall;
    3801     pEntry->ThreadedCall.fRecompiled = fRecompiled;
    3802     pEntry->ThreadedCall.uUnused     = 0;
    3803     pEntry->ThreadedCall.enmCall     = (uint16_t)enmCall;
    3804 }
    3805 
    3806 
    3807 /**
    3808  * Debug Info: Record info about a new guest instruction.
    3809  */
    3810 static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
    3811 {
    3812     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3813     pEntry->GuestInstruction.uType   = kIemTbDbgEntryType_GuestInstruction;
    3814     pEntry->GuestInstruction.uUnused = 0;
    3815     pEntry->GuestInstruction.fExec   = fExec;
    3816 }
    3817 
    3818 
    3819 /**
    3820  * Debug Info: Record info about guest register shadowing.
    3821  */
    3822 DECL_HIDDEN_THROW(void)
    3823 iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
    3824                                      uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
    3825 {
    3826     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3827     pEntry->GuestRegShadowing.uType         = kIemTbDbgEntryType_GuestRegShadowing;
    3828     pEntry->GuestRegShadowing.uUnused       = 0;
    3829     pEntry->GuestRegShadowing.idxGstReg     = enmGstReg;
    3830     pEntry->GuestRegShadowing.idxHstReg     = idxHstReg;
    3831     pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
    3832 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    3833     Assert(   idxHstReg != UINT8_MAX
    3834            || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
    3835 #endif
    3836 }
    3837 
    3838 
    3839 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    3840 /**
    3841  * Debug Info: Record info about guest register shadowing.
    3842  */
    3843 DECL_HIDDEN_THROW(void)
    3844 iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    3845                                          uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
    3846 {
    3847     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3848     pEntry->GuestSimdRegShadowing.uType             = kIemTbDbgEntryType_GuestSimdRegShadowing;
    3849     pEntry->GuestSimdRegShadowing.uUnused           = 0;
    3850     pEntry->GuestSimdRegShadowing.idxGstSimdReg     = enmGstSimdReg;
    3851     pEntry->GuestSimdRegShadowing.idxHstSimdReg     = idxHstSimdReg;
    3852     pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
    3853 }
    3854 # endif
    3855 
    3856 
    3857 # ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    3858 /**
    3859  * Debug Info: Record info about delayed RIP updates.
    3860  */
    3861 DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
    3862 {
    3863     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3864     pEntry->DelayedPcUpdate.uType         = kIemTbDbgEntryType_DelayedPcUpdate;
    3865     pEntry->DelayedPcUpdate.offPc         = offPc;
    3866     pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
    3867 }
    3868 # endif
    3869 
    3870 # if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
    3871 
    3872 /**
    3873  * Debug Info: Record info about a dirty guest register.
    3874  */
    3875 DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
    3876                                                          uint8_t idxGstReg, uint8_t idxHstReg)
    3877 {
    3878     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3879     pEntry->GuestRegDirty.uType         = kIemTbDbgEntryType_GuestRegDirty;
    3880     pEntry->GuestRegDirty.fSimdReg      = fSimdReg ? 1 : 0;
    3881     pEntry->GuestRegDirty.idxGstReg     = idxGstReg;
    3882     pEntry->GuestRegDirty.idxHstReg     = idxHstReg;
    3883 }
    3884 
    3885 
    3886 /**
    3887  * Debug Info: Record info about a dirty guest register writeback operation.
    3888  */
    3889 DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
    3890 {
    3891     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3892     pEntry->GuestRegWriteback.uType         = kIemTbDbgEntryType_GuestRegWriteback;
    3893     pEntry->GuestRegWriteback.fSimdReg      = fSimdReg ? 1 : 0;
    3894     pEntry->GuestRegWriteback.fGstReg       = (uint32_t)fGstReg;
    3895     /** @todo r=aeichner Can't fit the whole register mask in the debug info entry, deal with it when it becomes necessary. */
    3896     Assert((uint64_t)pEntry->GuestRegWriteback.fGstReg == fGstReg);
    3897 }
    3898 
    3899 # endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
    3900 
    3901 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    3902 
    3903 
    3904 /*********************************************************************************************************************************
    3905 *   Register Allocator                                                                                                           *
    3906 *********************************************************************************************************************************/
    3907 
    3908 /**
    3909  * Register parameter indexes (indexed by argument number).
    3910  */
    3911 DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
    3912 {
    3913     IEMNATIVE_CALL_ARG0_GREG,
    3914     IEMNATIVE_CALL_ARG1_GREG,
    3915     IEMNATIVE_CALL_ARG2_GREG,
    3916     IEMNATIVE_CALL_ARG3_GREG,
    3917 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    3918     IEMNATIVE_CALL_ARG4_GREG,
    3919 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    3920     IEMNATIVE_CALL_ARG5_GREG,
    3921 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    3922     IEMNATIVE_CALL_ARG6_GREG,
    3923 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    3924     IEMNATIVE_CALL_ARG7_GREG,
    3925 #   endif
    3926 #  endif
    3927 # endif
    3928 #endif
    3929 };
    3930 AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    3931 
    3932 /**
    3933  * Call register masks indexed by argument count.
    3934  */
    3935 DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
    3936 {
    3937     0,
    3938     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
    3939     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
    3940     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
    3941       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3942     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
    3943 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    3944       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3945     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
    3946 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    3947       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3948     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
    3949 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    3950       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3951     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    3952     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
    3953 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    3954       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3955     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    3956     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
    3957 #   endif
    3958 #  endif
    3959 # endif
    3960 #endif
    3961 };
    3962 
    3963 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    3964 /**
    3965  * BP offset of the stack argument slots.
    3966  *
    3967  * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
    3968  * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
    3969  */
    3970 DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
    3971 {
    3972     IEMNATIVE_FP_OFF_STACK_ARG0,
    3973 # ifdef IEMNATIVE_FP_OFF_STACK_ARG1
    3974     IEMNATIVE_FP_OFF_STACK_ARG1,
    3975 # endif
    3976 # ifdef IEMNATIVE_FP_OFF_STACK_ARG2
    3977     IEMNATIVE_FP_OFF_STACK_ARG2,
    3978 # endif
    3979 # ifdef IEMNATIVE_FP_OFF_STACK_ARG3
    3980     IEMNATIVE_FP_OFF_STACK_ARG3,
    3981 # endif
    3982 };
    3983 AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
    3984 #endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
    3985 
    3986 /**
    3987  * Info about shadowed guest register values.
    3988  * @see IEMNATIVEGSTREG
    3989  */
    3990 DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
    3991 {
    3992 #define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
    3993     /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */  { CPUMCTX_OFF_AND_SIZE(rax),                "rax", },
    3994     /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */  { CPUMCTX_OFF_AND_SIZE(rcx),                "rcx", },
    3995     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */  { CPUMCTX_OFF_AND_SIZE(rdx),                "rdx", },
    3996     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */  { CPUMCTX_OFF_AND_SIZE(rbx),                "rbx", },
    3997     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */  { CPUMCTX_OFF_AND_SIZE(rsp),                "rsp", },
    3998     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */  { CPUMCTX_OFF_AND_SIZE(rbp),                "rbp", },
    3999     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */  { CPUMCTX_OFF_AND_SIZE(rsi),                "rsi", },
    4000     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */  { CPUMCTX_OFF_AND_SIZE(rdi),                "rdi", },
    4001     /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */  { CPUMCTX_OFF_AND_SIZE(r8),                 "r8", },
    4002     /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */  { CPUMCTX_OFF_AND_SIZE(r9),                 "r9", },
    4003     /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */  { CPUMCTX_OFF_AND_SIZE(r10),                "r10", },
    4004     /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */  { CPUMCTX_OFF_AND_SIZE(r11),                "r11", },
    4005     /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */  { CPUMCTX_OFF_AND_SIZE(r12),                "r12", },
    4006     /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */  { CPUMCTX_OFF_AND_SIZE(r13),                "r13", },
    4007     /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */  { CPUMCTX_OFF_AND_SIZE(r14),                "r14", },
    4008     /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */  { CPUMCTX_OFF_AND_SIZE(r15),                "r15", },
    4009     /* [kIemNativeGstReg_Pc] = */                       { CPUMCTX_OFF_AND_SIZE(rip),                "rip", },
    4010     /* [kIemNativeGstReg_Cr0] = */                      { CPUMCTX_OFF_AND_SIZE(cr0),                "cr0", },
    4011     /* [kIemNativeGstReg_FpuFcw] = */                   { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW),     "fcw", },
    4012     /* [kIemNativeGstReg_FpuFsw] = */                   { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW),     "fsw", },
    4013     /* [kIemNativeGstReg_SegBaseFirst + 0] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base),  "es_base", },
    4014     /* [kIemNativeGstReg_SegBaseFirst + 1] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base),  "cs_base", },
    4015     /* [kIemNativeGstReg_SegBaseFirst + 2] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base),  "ss_base", },
    4016     /* [kIemNativeGstReg_SegBaseFirst + 3] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base),  "ds_base", },
    4017     /* [kIemNativeGstReg_SegBaseFirst + 4] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base),  "fs_base", },
    4018     /* [kIemNativeGstReg_SegBaseFirst + 5] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base),  "gs_base", },
    4019     /* [kIemNativeGstReg_SegAttribFirst + 0] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u),   "es_attrib", },
    4020     /* [kIemNativeGstReg_SegAttribFirst + 1] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u),   "cs_attrib", },
    4021     /* [kIemNativeGstReg_SegAttribFirst + 2] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u),   "ss_attrib", },
    4022     /* [kIemNativeGstReg_SegAttribFirst + 3] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u),   "ds_attrib", },
    4023     /* [kIemNativeGstReg_SegAttribFirst + 4] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u),   "fs_attrib", },
    4024     /* [kIemNativeGstReg_SegAttribFirst + 5] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u),   "gs_attrib", },
    4025     /* [kIemNativeGstReg_SegLimitFirst + 0] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
    4026     /* [kIemNativeGstReg_SegLimitFirst + 1] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
    4027     /* [kIemNativeGstReg_SegLimitFirst + 2] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
    4028     /* [kIemNativeGstReg_SegLimitFirst + 3] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
    4029     /* [kIemNativeGstReg_SegLimitFirst + 4] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
    4030     /* [kIemNativeGstReg_SegLimitFirst + 5] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
    4031     /* [kIemNativeGstReg_SegSelFirst + 0] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel),      "es", },
    4032     /* [kIemNativeGstReg_SegSelFirst + 1] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel),      "cs", },
    4033     /* [kIemNativeGstReg_SegSelFirst + 2] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel),      "ss", },
    4034     /* [kIemNativeGstReg_SegSelFirst + 3] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel),      "ds", },
    4035     /* [kIemNativeGstReg_SegSelFirst + 4] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel),      "fs", },
    4036     /* [kIemNativeGstReg_SegSelFirst + 5] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel),      "gs", },
    4037     /* [kIemNativeGstReg_Cr4] = */                      { CPUMCTX_OFF_AND_SIZE(cr4),                "cr4", },
    4038     /* [kIemNativeGstReg_Xcr0] = */                     { CPUMCTX_OFF_AND_SIZE(aXcr[0]),            "xcr0", },
    4039     /* [kIemNativeGstReg_MxCsr] = */                    { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR),   "mxcsr", },
    4040     /* [kIemNativeGstReg_EFlags] = */                   { CPUMCTX_OFF_AND_SIZE(eflags),             "eflags", },
    4041 #undef CPUMCTX_OFF_AND_SIZE
    4042 };
    4043 AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
    4044 
    4045 
    4046 /** Host CPU general purpose register names. */
    4047 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
    4048 {
    4049 #ifdef RT_ARCH_AMD64
    4050     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
    4051 #elif RT_ARCH_ARM64
    4052     "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",  "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    4053     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp",  "lr",  "sp/xzr",
    4054 #else
    4055 # error "port me"
    4056 #endif
    4057 };
    4058 
    4059 
    4060 #if 0 /* unused */
    4061 /**
    4062  * Tries to locate a suitable register in the given register mask.
    4063  *
    4064  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    4065  * failed.
    4066  *
    4067  * @returns Host register number on success, returns UINT8_MAX on failure.
    4068  */
    4069 static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
    4070 {
    4071     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    4072     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    4073     if (fRegs)
    4074     {
    4075         /** @todo pick better here:    */
    4076         unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
    4077 
    4078         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    4079         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    4080                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    4081         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4082 
    4083         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4084         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4085         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4086         return idxReg;
    4087     }
    4088     return UINT8_MAX;
    4089 }
    4090 #endif /* unused */
    4091 
    4092 
    4093 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4094 /**
    4095  * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
    4096  *
    4097  * @returns New code buffer offset on success, UINT32_MAX on failure.
    4098  * @param   pReNative   .
    4099  * @param   off         The current code buffer position.
    4100  * @param   enmGstReg   The guest register to store to.
    4101  * @param   idxHstReg   The host register to store from.
    4102  */
    4103 DECL_FORCE_INLINE_THROW(uint32_t)
    4104 iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
    4105 {
    4106     Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
    4107     Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
    4108 
    4109     switch (g_aGstShadowInfo[enmGstReg].cb)
    4110     {
    4111         case sizeof(uint64_t):
    4112             return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4113         case sizeof(uint32_t):
    4114             return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4115         case sizeof(uint16_t):
    4116             return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4117 #if 0 /* not present in the table. */
    4118         case sizeof(uint8_t):
    4119             return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4120 #endif
    4121         default:
    4122             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    4123     }
    4124 }
    4125 
    4126 
    4127 /**
    4128  * Emits code to flush a pending write of the given guest register if any.
    4129  *
    4130  * @returns New code buffer offset.
    4131  * @param   pReNative       The native recompile state.
    4132  * @param   off             Current code buffer position.
    4133  * @param   enmGstReg       The guest register to flush.
    4134  */
    4135 DECL_HIDDEN_THROW(uint32_t)
    4136 iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
    4137 {
    4138     uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4139 
    4140     Assert(enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast);
    4141     Assert(   idxHstReg != UINT8_MAX
    4142            && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
    4143     Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
    4144            g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
    4145 
    4146     off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
    4147 
    4148     pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
    4149     return off;
    4150 }
    4151 
    4152 
    4153 /**
    4154  * Flush the given set of guest registers if marked as dirty.
    4155  *
    4156  * @returns New code buffer offset.
    4157  * @param   pReNative       The native recompile state.
    4158  * @param   off             Current code buffer position.
    4159  * @param   fFlushGstReg    The guest register set to flush (default is flush everything).
    4160  */
    4161 DECL_HIDDEN_THROW(uint32_t)
    4162 iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
    4163 {
    4164     uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
    4165     if (bmGstRegShadowDirty)
    4166     {
    4167 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4168         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4169         iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
    4170 # endif
    4171         do
    4172         {
    4173             unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
    4174             bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
    4175             off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
    4176             Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
    4177         } while (bmGstRegShadowDirty);
    4178     }
    4179 
    4180     return off;
    4181 }
    4182 
    4183 
    4184 /**
    4185  * Flush all shadowed guest registers marked as dirty for the given host register.
    4186  *
    4187  * @returns New code buffer offset.
    4188  * @param   pReNative       The native recompile state.
    4189  * @param   off             Current code buffer position.
    4190  * @param   idxHstReg       The host register.
    4191  *
    4192  * @note This doesn't do any unshadowing of guest registers from the host register.
    4193  */
    4194 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
    4195 {
    4196     /* We need to flush any pending guest register writes this host register shadows. */
    4197     uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4198     if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
    4199     {
    4200 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4201         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    4202         iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
    4203 # endif
    4204         /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
    4205          *        likely to only have a single bit set. It'll be in the 0..15 range,
    4206          *        but still it's 15 unnecessary loops for the last guest register.  */
    4207 
    4208         uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
    4209         do
    4210         {
    4211             unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
    4212             bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
    4213             off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
    4214             Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
    4215         } while (bmGstRegShadowDirty);
    4216     }
    4217 
    4218     return off;
    4219 }
    4220 #endif
    4221 
    4222 
    4223 /**
    4224  * Locate a register, possibly freeing one up.
    4225  *
    4226  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    4227  * failed.
    4228  *
    4229  * @returns Host register number on success. Returns UINT8_MAX if no registers
    4230  *          found, the caller is supposed to deal with this and raise a
    4231  *          allocation type specific status code (if desired).
    4232  *
    4233  * @throws  VBox status code if we're run into trouble spilling a variable of
    4234  *          recording debug info.  Does NOT throw anything if we're out of
    4235  *          registers, though.
    4236  */
    4237 static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
    4238                                          uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
    4239 {
    4240     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
    4241     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    4242     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    4243 
    4244     /*
    4245      * Try a freed register that's shadowing a guest register.
    4246      */
    4247     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    4248     if (fRegs)
    4249     {
    4250         STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
    4251 
    4252 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4253         /*
    4254          * When we have livness information, we use it to kick out all shadowed
    4255          * guest register that will not be needed any more in this TB.  If we're
    4256          * lucky, this may prevent us from ending up here again.
    4257          *
    4258          * Note! We must consider the previous entry here so we don't free
    4259          *       anything that the current threaded function requires (current
    4260          *       entry is produced by the next threaded function).
    4261          */
    4262         uint32_t const idxCurCall = pReNative->idxCurCall;
    4263         if (idxCurCall > 0)
    4264         {
    4265             PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
    4266 
    4267 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    4268             /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
    4269             AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
    4270             uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
    4271 #else
    4272             /* Construct a mask of the registers not in the read or write state.
    4273                Note! We could skips writes, if they aren't from us, as this is just
    4274                      a hack to prevent trashing registers that have just been written
    4275                      or will be written when we retire the current instruction. */
    4276             uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    4277                                  & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    4278                                  & IEMLIVENESSBIT_MASK;
    4279 #endif
    4280             /* Merge EFLAGS. */
    4281             uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3);   /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
    4282             fTmp &= fTmp >> 2;                                  /*         CF3,Other3 = AF2,PF2 & CF2,Other2  */
    4283             fTmp &= fTmp >> 1;                                  /*             Other4 = CF3 & Other3 */
    4284             fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
    4285             fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
    4286 
    4287             /* If it matches any shadowed registers. */
    4288             if (pReNative->Core.bmGstRegShadows & fToFreeMask)
    4289             {
    4290 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4291                 /* Writeback any dirty shadow registers we are about to unshadow. */
    4292                 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
    4293 #endif
    4294 
    4295                 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
    4296                 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
    4297                 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
    4298 
    4299                 /* See if we've got any unshadowed registers we can return now. */
    4300                 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
    4301                 if (fUnshadowedRegs)
    4302                 {
    4303                     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
    4304                     return (fPreferVolatile
    4305                             ? ASMBitFirstSetU32(fUnshadowedRegs)
    4306                             : ASMBitLastSetU32(  fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4307                                                ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
    4308                          - 1;
    4309                 }
    4310             }
    4311         }
    4312 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    4313 
    4314         unsigned const idxReg = (fPreferVolatile
    4315                                  ? ASMBitFirstSetU32(fRegs)
    4316                                  : ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4317                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
    4318                               - 1;
    4319 
    4320         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    4321         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    4322                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    4323         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4324 
    4325 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4326         /* We need to flush any pending guest register writes this host register shadows. */
    4327         *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
    4328 #endif
    4329 
    4330         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4331         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4332         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4333         return idxReg;
    4334     }
    4335 
    4336     /*
    4337      * Try free up a variable that's in a register.
    4338      *
    4339      * We do two rounds here, first evacuating variables we don't need to be
    4340      * saved on the stack, then in the second round move things to the stack.
    4341      */
    4342     STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
    4343     for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
    4344     {
    4345         uint32_t fVars = pReNative->Core.bmVars;
    4346         while (fVars)
    4347         {
    4348             uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
    4349             uint8_t const  idxReg = pReNative->Core.aVars[idxVar].idxReg;
    4350 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    4351             if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
    4352                 continue;
    4353 #endif
    4354 
    4355             if (   idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
    4356                 && (RT_BIT_32(idxReg) & fRegMask)
    4357                 && (  iLoop == 0
    4358                     ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
    4359                     : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    4360                 && !pReNative->Core.aVars[idxVar].fRegAcquired)
    4361             {
    4362                 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
    4363                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
    4364                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    4365                 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4366                 Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    4367                        == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
    4368 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4369                 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
    4370 #endif
    4371 
    4372                 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    4373                 {
    4374                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    4375                     *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
    4376                 }
    4377 
    4378                 pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    4379                 pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxReg);
    4380 
    4381                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4382                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4383                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4384                 return idxReg;
    4385             }
    4386             fVars &= ~RT_BIT_32(idxVar);
    4387         }
    4388     }
    4389 
    4390     return UINT8_MAX;
    4391 }
    4392 
    4393 
    4394 /**
    4395  * Reassigns a variable to a different register specified by the caller.
    4396  *
    4397  * @returns The new code buffer position.
    4398  * @param   pReNative       The native recompile state.
    4399  * @param   off             The current code buffer position.
    4400  * @param   idxVar          The variable index.
    4401  * @param   idxRegOld       The old host register number.
    4402  * @param   idxRegNew       The new host register number.
    4403  * @param   pszCaller       The caller for logging.
    4404  */
    4405 static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    4406                                     uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
    4407 {
    4408     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4409     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
    4410 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    4411     Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
    4412 #endif
    4413     RT_NOREF(pszCaller);
    4414 
    4415 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4416     Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
    4417 #endif
    4418     iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
    4419 
    4420     uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    4421 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4422     Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
    4423 #endif
    4424     Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
    4425            pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
    4426     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
    4427 
    4428     pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
    4429     pReNative->Core.aHstRegs[idxRegNew].enmWhat        = kIemNativeWhat_Var;
    4430     pReNative->Core.aHstRegs[idxRegNew].idxVar         = idxVar;
    4431     if (fGstRegShadows)
    4432     {
    4433         pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
    4434                                                | RT_BIT_32(idxRegNew);
    4435         while (fGstRegShadows)
    4436         {
    4437             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    4438             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    4439 
    4440             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
    4441             pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
    4442         }
    4443     }
    4444 
    4445     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
    4446     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    4447     pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
    4448     return off;
    4449 }
    4450 
    4451 
    4452 /**
    4453  * Moves a variable to a different register or spills it onto the stack.
    4454  *
    4455  * This must be a stack variable (kIemNativeVarKind_Stack) because the other
    4456  * kinds can easily be recreated if needed later.
    4457  *
    4458  * @returns The new code buffer position.
    4459  * @param   pReNative       The native recompile state.
    4460  * @param   off             The current code buffer position.
    4461  * @param   idxVar          The variable index.
    4462  * @param   fForbiddenRegs  Mask of the forbidden registers.  Defaults to
    4463  *                          call-volatile registers.
    4464  */
    4465 DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    4466                                                             uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
    4467 {
    4468     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4469     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    4470     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    4471     Assert(!pVar->fRegAcquired);
    4472 
    4473     uint8_t const idxRegOld = pVar->idxReg;
    4474     Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4475     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
    4476     Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
    4477     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
    4478            == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
    4479     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4480     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
    4481            == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
    4482 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4483     Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
    4484 #endif
    4485 
    4486 
    4487     /** @todo Add statistics on this.*/
    4488     /** @todo Implement basic variable liveness analysis (python) so variables
    4489      * can be freed immediately once no longer used.  This has the potential to
    4490      * be trashing registers and stack for dead variables.
    4491      * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
    4492 
    4493     /*
    4494      * First try move it to a different register, as that's cheaper.
    4495      */
    4496     fForbiddenRegs |= RT_BIT_32(idxRegOld);
    4497     fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
    4498     uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
    4499     if (fRegs)
    4500     {
    4501         /* Avoid using shadow registers, if possible. */
    4502         if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
    4503             fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
    4504         unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
    4505         return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
    4506     }
    4507 
    4508     /*
    4509      * Otherwise we must spill the register onto the stack.
    4510      */
    4511     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    4512     Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    4513            idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    4514     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    4515 
    4516     pVar->idxReg                            = UINT8_MAX;
    4517     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    4518     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    4519     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    4520     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    4521     return off;
    4522 }
    4523 
    4524 
    4525 /**
    4526  * Allocates a temporary host general purpose register.
    4527  *
    4528  * This may emit code to save register content onto the stack in order to free
    4529  * up a register.
    4530  *
    4531  * @returns The host register number; throws VBox status code on failure,
    4532  *          so no need to check the return value.
    4533  * @param   pReNative       The native recompile state.
    4534  * @param   poff            Pointer to the variable with the code buffer position.
    4535  *                          This will be update if we need to move a variable from
    4536  *                          register to stack in order to satisfy the request.
    4537  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    4538  *                          registers (@c true, default) or the other way around
    4539  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    4540  */
    4541 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
    4542 {
    4543     /*
    4544      * Try find a completely unused register, preferably a call-volatile one.
    4545      */
    4546     uint8_t  idxReg;
    4547     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    4548                    & ~pReNative->Core.bmHstRegsWithGstShadow
    4549                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
    4550     if (fRegs)
    4551     {
    4552         if (fPreferVolatile)
    4553             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4554                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    4555         else
    4556             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4557                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    4558         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4559         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    4560         Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
    4561     }
    4562     else
    4563     {
    4564         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
    4565         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    4566         Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
    4567     }
    4568     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    4569 }
    4570 
    4571 
    4572 /**
    4573  * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
    4574  * registers.
    4575  *
    4576  * @returns The host register number; throws VBox status code on failure,
    4577  *          so no need to check the return value.
    4578  * @param   pReNative       The native recompile state.
    4579  * @param   poff            Pointer to the variable with the code buffer position.
    4580  *                          This will be update if we need to move a variable from
    4581  *                          register to stack in order to satisfy the request.
    4582  * @param   fRegMask        Mask of acceptable registers.
    4583  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    4584  *                          registers (@c true, default) or the other way around
    4585  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    4586  */
    4587 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
    4588                                                   bool fPreferVolatile /*= true*/)
    4589 {
    4590     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    4591     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    4592 
    4593     /*
    4594      * Try find a completely unused register, preferably a call-volatile one.
    4595      */
    4596     uint8_t  idxReg;
    4597     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    4598                    & ~pReNative->Core.bmHstRegsWithGstShadow
    4599                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    4600                    & fRegMask;
    4601     if (fRegs)
    4602     {
    4603         if (fPreferVolatile)
    4604             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4605                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    4606         else
    4607             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4608                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    4609         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4610         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    4611         Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
    4612     }
    4613     else
    4614     {
    4615         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
    4616         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    4617         Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
    4618     }
    4619     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    4620 }
    4621 
    4622 
    4623 /**
    4624  * Allocates a temporary register for loading an immediate value into.
    4625  *
    4626  * This will emit code to load the immediate, unless there happens to be an
    4627  * unused register with the value already loaded.
    4628  *
    4629  * The caller will not modify the returned register, it must be considered
    4630  * read-only.  Free using iemNativeRegFreeTmpImm.
    4631  *
    4632  * @returns The host register number; throws VBox status code on failure, so no
    4633  *          need to check the return value.
    4634  * @param   pReNative       The native recompile state.
    4635  * @param   poff            Pointer to the variable with the code buffer position.
    4636  * @param   uImm            The immediate value that the register must hold upon
    4637  *                          return.
    4638  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    4639  *                          registers (@c true, default) or the other way around
    4640  *                          (@c false).
    4641  *
    4642  * @note    Reusing immediate values has not been implemented yet.
    4643  */
    4644 DECL_HIDDEN_THROW(uint8_t)
    4645 iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
    4646 {
    4647     uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
    4648     *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
    4649     return idxReg;
    4650 }
    4651 
    4652 
    4653 /**
    4654  * Allocates a temporary host general purpose register for keeping a guest
    4655  * register value.
    4656  *
    4657  * Since we may already have a register holding the guest register value,
    4658  * code will be emitted to do the loading if that's not the case. Code may also
    4659  * be emitted if we have to free up a register to satify the request.
    4660  *
    4661  * @returns The host register number; throws VBox status code on failure, so no
    4662  *          need to check the return value.
    4663  * @param   pReNative       The native recompile state.
    4664  * @param   poff            Pointer to the variable with the code buffer
    4665  *                          position. This will be update if we need to move a
    4666  *                          variable from register to stack in order to satisfy
    4667  *                          the request.
    4668  * @param   enmGstReg       The guest register that will is to be updated.
    4669  * @param   enmIntendedUse  How the caller will be using the host register.
    4670  * @param   fNoVolatileRegs Set if no volatile register allowed, clear if any
    4671  *                          register is okay (default).  The ASSUMPTION here is
    4672  *                          that the caller has already flushed all volatile
    4673  *                          registers, so this is only applied if we allocate a
    4674  *                          new register.
    4675  * @param   fSkipLivenessAssert     Hack for liveness input validation of EFLAGS.
    4676  * @sa      iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
    4677  */
    4678 DECL_HIDDEN_THROW(uint8_t)
    4679 iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
    4680                                 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
    4681                                 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
    4682 {
    4683     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    4684 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4685     AssertMsg(   fSkipLivenessAssert
    4686               || pReNative->idxCurCall == 0
    4687               || enmGstReg == kIemNativeGstReg_Pc
    4688               || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    4689                   ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4690                   : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
    4691                   ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4692                   : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(  iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
    4693               ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
    4694 #endif
    4695     RT_NOREF(fSkipLivenessAssert);
    4696 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)
    4697     static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
    4698 #endif
    4699     uint32_t const fRegMask = !fNoVolatileRegs
    4700                             ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
    4701                             : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4702 
    4703     /*
    4704      * First check if the guest register value is already in a host register.
    4705      */
    4706     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4707     {
    4708         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4709         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4710         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4711         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4712 
    4713         /* It's not supposed to be allocated... */
    4714         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    4715         {
    4716             /*
    4717              * If the register will trash the guest shadow copy, try find a
    4718              * completely unused register we can use instead.  If that fails,
    4719              * we need to disassociate the host reg from the guest reg.
    4720              */
    4721             /** @todo would be nice to know if preserving the register is in any way helpful. */
    4722             /* If the purpose is calculations, try duplicate the register value as
    4723                we'll be clobbering the shadow. */
    4724             if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
    4725                 && (  ~pReNative->Core.bmHstRegs
    4726                     & ~pReNative->Core.bmHstRegsWithGstShadow
    4727                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
    4728             {
    4729                 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
    4730 
    4731                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4732 
    4733                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    4734                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4735                        g_apszIemNativeHstRegNames[idxRegNew]));
    4736                 idxReg = idxRegNew;
    4737             }
    4738             /* If the current register matches the restrictions, go ahead and allocate
    4739                it for the caller. */
    4740             else if (fRegMask & RT_BIT_32(idxReg))
    4741             {
    4742                 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    4743                 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    4744                 pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4745                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4746                     Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
    4747                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4748                 else
    4749                 {
    4750                     iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    4751                     Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
    4752                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    4753                 }
    4754             }
    4755             /* Otherwise, allocate a register that satisfies the caller and transfer
    4756                the shadowing if compatible with the intended use.  (This basically
    4757                means the call wants a non-volatile register (RSP push/pop scenario).) */
    4758             else
    4759             {
    4760                 Assert(fNoVolatileRegs);
    4761                 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
    4762                                                                     !fNoVolatileRegs
    4763                                                                  && enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4764                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4765                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4766                 {
    4767                     iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    4768                     Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
    4769                            g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
    4770                            g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4771                 }
    4772                 else
    4773                     Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    4774                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4775                            g_apszIemNativeHstRegNames[idxRegNew]));
    4776                 idxReg = idxRegNew;
    4777             }
    4778         }
    4779         else
    4780         {
    4781             /*
    4782              * Oops. Shadowed guest register already allocated!
    4783              *
    4784              * Allocate a new register, copy the value and, if updating, the
    4785              * guest shadow copy assignment to the new register.
    4786              */
    4787             AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    4788                       && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
    4789                       ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
    4790                        idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
    4791 
    4792             /** @todo share register for readonly access. */
    4793             uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
    4794                                                              enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4795 
    4796             if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4797                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4798 
    4799             if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    4800                 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4801                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
    4802                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4803                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    4804             else
    4805             {
    4806                 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    4807                 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
    4808                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4809                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    4810             }
    4811             idxReg = idxRegNew;
    4812         }
    4813         Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
    4814 
    4815 #ifdef VBOX_STRICT
    4816         /* Strict builds: Check that the value is correct. */
    4817         *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    4818 #endif
    4819 
    4820 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4821         /** @todo r=aeichner Implement for registers other than GPR as well. */
    4822         if (   (   enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    4823                 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
    4824             && enmGstReg >= kIemNativeGstReg_GprFirst
    4825             && enmGstReg <= kIemNativeGstReg_GprLast
    4826             )
    4827         {
    4828 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4829             iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
    4830             iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
    4831 # endif
    4832             pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
    4833         }
    4834 #endif
    4835 
    4836         return idxReg;
    4837     }
    4838 
    4839     /*
    4840      * Allocate a new register, load it with the guest value and designate it as a copy of the
    4841      */
    4842     uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4843 
    4844     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4845         *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
    4846 
    4847     if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4848         iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
    4849     Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
    4850            g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4851 
    4852 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    4853     /** @todo r=aeichner Implement for registers other than GPR as well. */
    4854     if (   (   enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    4855             || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
    4856         && enmGstReg >= kIemNativeGstReg_GprFirst
    4857         && enmGstReg <= kIemNativeGstReg_GprLast
    4858         )
    4859     {
    4860 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    4861         iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
    4862         iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
    4863 # endif
    4864         pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
    4865     }
    4866 #endif
    4867 
    4868     return idxRegNew;
    4869 }
    4870 
    4871 
    4872 /**
    4873  * Allocates a temporary host general purpose register that already holds the
    4874  * given guest register value.
    4875  *
    4876  * The use case for this function is places where the shadowing state cannot be
    4877  * modified due to branching and such.  This will fail if the we don't have a
    4878  * current shadow copy handy or if it's incompatible.  The only code that will
    4879  * be emitted here is value checking code in strict builds.
    4880  *
    4881  * The intended use can only be readonly!
    4882  *
    4883  * @returns The host register number, UINT8_MAX if not present.
    4884  * @param   pReNative       The native recompile state.
    4885  * @param   poff            Pointer to the instruction buffer offset.
    4886  *                          Will be updated in strict builds if a register is
    4887  *                          found.
    4888  * @param   enmGstReg       The guest register that will is to be updated.
    4889  * @note    In strict builds, this may throw instruction buffer growth failures.
    4890  *          Non-strict builds will not throw anything.
    4891  * @sa iemNativeRegAllocTmpForGuestReg
    4892  */
    4893 DECL_HIDDEN_THROW(uint8_t)
    4894 iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
    4895 {
    4896     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    4897 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    4898     AssertMsg(   pReNative->idxCurCall == 0
    4899               || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
    4900               || enmGstReg == kIemNativeGstReg_Pc,
    4901               ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
    4902 #endif
    4903 
    4904     /*
    4905      * First check if the guest register value is already in a host register.
    4906      */
    4907     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4908     {
    4909         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4910         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4911         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4912         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4913 
    4914         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    4915         {
    4916             /*
    4917              * We only do readonly use here, so easy compared to the other
    4918              * variant of this code.
    4919              */
    4920             pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    4921             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    4922             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4923             Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
    4924                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    4925 
    4926 #ifdef VBOX_STRICT
    4927             /* Strict builds: Check that the value is correct. */
    4928             *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    4929 #else
    4930             RT_NOREF(poff);
    4931 #endif
    4932             return idxReg;
    4933         }
    4934     }
    4935 
    4936     return UINT8_MAX;
    4937 }
    4938 
    4939 
    4940 /**
    4941  * Allocates argument registers for a function call.
    4942  *
    4943  * @returns New code buffer offset on success; throws VBox status code on failure, so no
    4944  *          need to check the return value.
    4945  * @param   pReNative   The native recompile state.
    4946  * @param   off         The current code buffer offset.
    4947  * @param   cArgs       The number of arguments the function call takes.
    4948  */
    4949 DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
    4950 {
    4951     AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
    4952                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
    4953     Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    4954     Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    4955 
    4956     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    4957         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    4958     else if (cArgs == 0)
    4959         return true;
    4960 
    4961     /*
    4962      * Do we get luck and all register are free and not shadowing anything?
    4963      */
    4964     if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
    4965         for (uint32_t i = 0; i < cArgs; i++)
    4966         {
    4967             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    4968             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    4969             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4970             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4971         }
    4972     /*
    4973      * Okay, not lucky so we have to free up the registers.
    4974      */
    4975     else
    4976         for (uint32_t i = 0; i < cArgs; i++)
    4977         {
    4978             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    4979             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
    4980             {
    4981                 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    4982                 {
    4983                     case kIemNativeWhat_Var:
    4984                     {
    4985                         uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    4986                         IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4987                         AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
    4988                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    4989                         Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
    4990 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    4991                         Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
    4992 #endif
    4993 
    4994                         if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
    4995                             pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
    4996                         else
    4997                         {
    4998                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    4999                             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    5000                         }
    5001                         break;
    5002                     }
    5003 
    5004                     case kIemNativeWhat_Tmp:
    5005                     case kIemNativeWhat_Arg:
    5006                     case kIemNativeWhat_rc:
    5007                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    5008                     default:
    5009                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
    5010                 }
    5011 
    5012             }
    5013             if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    5014             {
    5015                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    5016                 Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    5017                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    5018 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5019                 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
    5020 #endif
    5021                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    5022                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    5023                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    5024             }
    5025             else
    5026                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    5027             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    5028             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    5029         }
    5030     pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
    5031     return true;
    5032 }
    5033 
    5034 
    5035 DECL_HIDDEN_THROW(uint8_t)  iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
    5036 
    5037 
    5038 #if 0
    5039 /**
    5040  * Frees a register assignment of any type.
    5041  *
    5042  * @param   pReNative       The native recompile state.
    5043  * @param   idxHstReg       The register to free.
    5044  *
    5045  * @note    Does not update variables.
    5046  */
    5047 DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    5048 {
    5049     Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    5050     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    5051     Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
    5052     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
    5053            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
    5054            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
    5055            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
    5056     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
    5057            || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
    5058            || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
    5059     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    5060            == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    5061     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    5062            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    5063 
    5064     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxHstReg);
    5065     /* no flushing, right:
    5066     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    5067     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    5068     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    5069     */
    5070 }
    5071 #endif
    5072 
    5073 
    5074 /**
    5075  * Frees a temporary register.
    5076  *
    5077  * Any shadow copies of guest registers assigned to the host register will not
    5078  * be flushed by this operation.
    5079  */
    5080 DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    5081 {
    5082     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    5083     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
    5084     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    5085     Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
    5086            g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    5087 }
    5088 
    5089 
    5090 /**
    5091  * Frees a temporary immediate register.
    5092  *
    5093  * It is assumed that the call has not modified the register, so it still hold
    5094  * the same value as when it was allocated via iemNativeRegAllocTmpImm().
    5095  */
    5096 DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    5097 {
    5098     iemNativeRegFreeTmp(pReNative, idxHstReg);
    5099 }
    5100 
    5101 
    5102 /**
    5103  * Frees a register assigned to a variable.
    5104  *
    5105  * The register will be disassociated from the variable.
    5106  */
    5107 DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
    5108 {
    5109     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    5110     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
    5111     uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    5112     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    5113     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
    5114 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    5115     Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
    5116 #endif
    5117 
    5118     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
    5119     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    5120     if (!fFlushShadows)
    5121         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
    5122                g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
    5123     else
    5124     {
    5125         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    5126         uint64_t const fGstRegShadowsOld        = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    5127 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5128         Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
    5129 #endif
    5130         pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    5131         pReNative->Core.bmGstRegShadows        &= ~fGstRegShadowsOld;
    5132         uint64_t       fGstRegShadows           = fGstRegShadowsOld;
    5133         while (fGstRegShadows)
    5134         {
    5135             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    5136             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    5137 
    5138             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
    5139             pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
    5140         }
    5141         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
    5142                g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
    5143     }
    5144 }
    5145 
    5146 
    5147 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    5148 # ifdef LOG_ENABLED
    5149 /** Host CPU SIMD register names. */
    5150 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
    5151 {
    5152 #  ifdef RT_ARCH_AMD64
    5153     "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
    5154 #  elif RT_ARCH_ARM64
    5155     "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",  "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
    5156     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
    5157 #  else
    5158 #   error "port me"
    5159 #  endif
    5160 };
    5161 # endif
    5162 
    5163 
    5164 /**
    5165  * Frees a SIMD register assigned to a variable.
    5166  *
    5167  * The register will be disassociated from the variable.
    5168  */
    5169 DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
    5170 {
    5171     Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
    5172     Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
    5173     uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
    5174     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    5175     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
    5176     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
    5177 
    5178     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
    5179     pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
    5180     if (!fFlushShadows)
    5181         Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
    5182                g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
    5183     else
    5184     {
    5185         pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    5186         uint64_t const fGstRegShadowsOld        = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
    5187         pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
    5188         pReNative->Core.bmGstSimdRegShadows    &= ~fGstRegShadowsOld;
    5189         uint64_t       fGstRegShadows           = fGstRegShadowsOld;
    5190         while (fGstRegShadows)
    5191         {
    5192             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    5193             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    5194 
    5195             Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
    5196             pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
    5197         }
    5198         Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
    5199                g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
    5200     }
    5201 }
    5202 
    5203 
    5204 /**
    5205  * Reassigns a variable to a different SIMD register specified by the caller.
    5206  *
    5207  * @returns The new code buffer position.
    5208  * @param   pReNative       The native recompile state.
    5209  * @param   off             The current code buffer position.
    5210  * @param   idxVar          The variable index.
    5211  * @param   idxRegOld       The old host register number.
    5212  * @param   idxRegNew       The new host register number.
    5213  * @param   pszCaller       The caller for logging.
    5214  */
    5215 static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    5216                                         uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
    5217 {
    5218     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    5219     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
    5220     Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
    5221     RT_NOREF(pszCaller);
    5222 
    5223     Assert(!(  (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    5224              & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
    5225     iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
    5226 
    5227     uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
    5228     Assert(!(  (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    5229              & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
    5230 
    5231     Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
    5232            pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
    5233     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
    5234 
    5235     if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
    5236         off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
    5237     else
    5238     {
    5239         Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
    5240         off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
    5241     }
    5242 
    5243     pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
    5244     pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat        = kIemNativeWhat_Var;
    5245     pReNative->Core.aHstSimdRegs[idxRegNew].idxVar         = idxVar;
    5246     if (fGstRegShadows)
    5247     {
    5248         pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
    5249                                                    | RT_BIT_32(idxRegNew);
    5250         while (fGstRegShadows)
    5251         {
    5252             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    5253             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    5254 
    5255             Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
    5256             pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
    5257         }
    5258     }
    5259 
    5260     pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
    5261     pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
    5262     pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
    5263     return off;
    5264 }
    5265 
    5266 
    5267 /**
    5268  * Moves a variable to a different register or spills it onto the stack.
    5269  *
    5270  * This must be a stack variable (kIemNativeVarKind_Stack) because the other
    5271  * kinds can easily be recreated if needed later.
    5272  *
    5273  * @returns The new code buffer position.
    5274  * @param   pReNative       The native recompile state.
    5275  * @param   off             The current code buffer position.
    5276  * @param   idxVar          The variable index.
    5277  * @param   fForbiddenRegs  Mask of the forbidden registers.  Defaults to
    5278  *                          call-volatile registers.
    5279  */
    5280 DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    5281                                                                 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
    5282 {
    5283     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    5284     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    5285     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    5286     Assert(!pVar->fRegAcquired);
    5287     Assert(!pVar->fSimdReg);
    5288 
    5289     uint8_t const idxRegOld = pVar->idxReg;
    5290     Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
    5291     Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
    5292     Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
    5293     Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
    5294            == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
    5295     Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    5296     Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
    5297            == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
    5298     Assert(!(  (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    5299              & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
    5300 
    5301     /** @todo Add statistics on this.*/
    5302     /** @todo Implement basic variable liveness analysis (python) so variables
    5303      * can be freed immediately once no longer used.  This has the potential to
    5304      * be trashing registers and stack for dead variables.
    5305      * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
    5306 
    5307     /*
    5308      * First try move it to a different register, as that's cheaper.
    5309      */
    5310     fForbiddenRegs |= RT_BIT_32(idxRegOld);
    5311     fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
    5312     uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
    5313     if (fRegs)
    5314     {
    5315         /* Avoid using shadow registers, if possible. */
    5316         if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
    5317             fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
    5318         unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
    5319         return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
    5320     }
    5321 
    5322     /*
    5323      * Otherwise we must spill the register onto the stack.
    5324      */
    5325     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    5326     Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    5327            idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    5328 
    5329     if (pVar->cbVar == sizeof(RTUINT128U))
    5330         off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    5331     else
    5332     {
    5333         Assert(pVar->cbVar == sizeof(RTUINT256U));
    5334         off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    5335     }
    5336 
    5337     pVar->idxReg                                = UINT8_MAX;
    5338     pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    5339     pReNative->Core.bmHstSimdRegs              &= ~RT_BIT_32(idxRegOld);
    5340     pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
    5341     pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
    5342     return off;
    5343 }
    5344 
    5345 
    5346 /**
    5347  * Called right before emitting a call instruction to move anything important
    5348  * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
    5349  * optionally freeing argument variables.
    5350  *
    5351  * @returns New code buffer offset, UINT32_MAX on failure.
    5352  * @param   pReNative       The native recompile state.
    5353  * @param   off             The code buffer offset.
    5354  * @param   cArgs           The number of arguments the function call takes.
    5355  *                          It is presumed that the host register part of these have
    5356  *                          been allocated as such already and won't need moving,
    5357  *                          just freeing.
    5358  * @param   fKeepVars       Mask of variables that should keep their register
    5359  *                          assignments.  Caller must take care to handle these.
    5360  */
    5361 DECL_HIDDEN_THROW(uint32_t)
    5362 iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
    5363 {
    5364     Assert(!cArgs); RT_NOREF(cArgs);
    5365 
    5366     /* fKeepVars will reduce this mask. */
    5367     uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
    5368 
    5369     /*
    5370      * Move anything important out of volatile registers.
    5371      */
    5372     uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    5373 #ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
    5374                              & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
    5375 #endif
    5376                              ;
    5377 
    5378     fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
    5379     if (!fSimdRegsToMove)
    5380     { /* likely */ }
    5381     else
    5382     {
    5383         Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
    5384         while (fSimdRegsToMove != 0)
    5385         {
    5386             unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
    5387             fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
    5388 
    5389             switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
    5390             {
    5391                 case kIemNativeWhat_Var:
    5392                 {
    5393                     uint8_t const       idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
    5394                     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    5395                     PIEMNATIVEVAR const pVar   = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    5396                     Assert(pVar->idxReg == idxSimdReg);
    5397                     Assert(pVar->fSimdReg);
    5398                     if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
    5399                     {
    5400                         Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
    5401                                idxVar, pVar->enmKind, pVar->idxReg));
    5402                         if (pVar->enmKind != kIemNativeVarKind_Stack)
    5403                             pVar->idxReg = UINT8_MAX;
    5404                         else
    5405                             off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
    5406                     }
    5407                     else
    5408                         fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
    5409                     continue;
    5410                 }
    5411 
    5412                 case kIemNativeWhat_Arg:
    5413                     AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
    5414                     continue;
    5415 
    5416                 case kIemNativeWhat_rc:
    5417                 case kIemNativeWhat_Tmp:
    5418                     AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
    5419                     continue;
    5420 
    5421                 case kIemNativeWhat_FixedReserved:
    5422 #ifdef RT_ARCH_ARM64
    5423                     continue; /* On ARM the upper half of the virtual 256-bit register. */
    5424 #endif
    5425 
    5426                 case kIemNativeWhat_FixedTmp:
    5427                 case kIemNativeWhat_pVCpuFixed:
    5428                 case kIemNativeWhat_pCtxFixed:
    5429                 case kIemNativeWhat_PcShadow:
    5430                 case kIemNativeWhat_Invalid:
    5431                 case kIemNativeWhat_End:
    5432                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
    5433             }
    5434             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
    5435         }
    5436     }
    5437 
    5438     /*
    5439      * Do the actual freeing.
    5440      */
    5441     if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
    5442         Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
    5443                pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
    5444     pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
    5445 
    5446     /* If there are guest register shadows in any call-volatile register, we
    5447        have to clear the corrsponding guest register masks for each register. */
    5448     uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
    5449     if (fHstSimdRegsWithGstShadow)
    5450     {
    5451         Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
    5452                pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
    5453         pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
    5454         do
    5455         {
    5456             unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
    5457             fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
    5458 
    5459             AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
    5460 
    5461 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5462             /*
    5463              * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
    5464              * to call volatile registers).
    5465              */
    5466             if (  (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    5467                 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
    5468                 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
    5469 #endif
    5470             Assert(!(  (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    5471                      & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
    5472 
    5473             pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
    5474             pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
    5475         } while (fHstSimdRegsWithGstShadow != 0);
    5476     }
    5477 
    5478     return off;
    5479 }
    5480 #endif
    5481 
    5482 
    5483 /**
    5484  * Called right before emitting a call instruction to move anything important
    5485  * out of call-volatile registers, free and flush the call-volatile registers,
    5486  * optionally freeing argument variables.
    5487  *
    5488  * @returns New code buffer offset, UINT32_MAX on failure.
    5489  * @param   pReNative       The native recompile state.
    5490  * @param   off             The code buffer offset.
    5491  * @param   cArgs           The number of arguments the function call takes.
    5492  *                          It is presumed that the host register part of these have
    5493  *                          been allocated as such already and won't need moving,
    5494  *                          just freeing.
    5495  * @param   fKeepVars       Mask of variables that should keep their register
    5496  *                          assignments.  Caller must take care to handle these.
    5497  */
    5498 DECL_HIDDEN_THROW(uint32_t)
    5499 iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
    5500 {
    5501     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    5502 
    5503     /* fKeepVars will reduce this mask. */
    5504     uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    5505 
    5506     /*
    5507      * Move anything important out of volatile registers.
    5508      */
    5509     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    5510         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    5511     uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
    5512 #ifdef IEMNATIVE_REG_FIXED_TMP0
    5513                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    5514 #endif
    5515 #ifdef IEMNATIVE_REG_FIXED_TMP1
    5516                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
    5517 #endif
    5518 #ifdef IEMNATIVE_REG_FIXED_PC_DBG
    5519                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
    5520 #endif
    5521                          & ~g_afIemNativeCallRegs[cArgs];
    5522 
    5523     fRegsToMove &= pReNative->Core.bmHstRegs;
    5524     if (!fRegsToMove)
    5525     { /* likely */ }
    5526     else
    5527     {
    5528         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
    5529         while (fRegsToMove != 0)
    5530         {
    5531             unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
    5532             fRegsToMove &= ~RT_BIT_32(idxReg);
    5533 
    5534             switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    5535             {
    5536                 case kIemNativeWhat_Var:
    5537                 {
    5538                     uint8_t const       idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    5539                     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    5540                     PIEMNATIVEVAR const pVar   = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    5541                     Assert(pVar->idxReg == idxReg);
    5542 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    5543                     Assert(!pVar->fSimdReg);
    5544 #endif
    5545                     if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
    5546                     {
    5547                         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
    5548                                idxVar, pVar->enmKind, pVar->idxReg));
    5549                         if (pVar->enmKind != kIemNativeVarKind_Stack)
    5550                             pVar->idxReg = UINT8_MAX;
    5551                         else
    5552                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    5553                     }
    5554                     else
    5555                         fRegsToFree &= ~RT_BIT_32(idxReg);
    5556                     continue;
    5557                 }
    5558 
    5559                 case kIemNativeWhat_Arg:
    5560                     AssertMsgFailed(("What?!?: %u\n", idxReg));
    5561                     continue;
    5562 
    5563                 case kIemNativeWhat_rc:
    5564                 case kIemNativeWhat_Tmp:
    5565                     AssertMsgFailed(("Missing free: %u\n", idxReg));
    5566                     continue;
    5567 
    5568                 case kIemNativeWhat_FixedTmp:
    5569                 case kIemNativeWhat_pVCpuFixed:
    5570                 case kIemNativeWhat_pCtxFixed:
    5571                 case kIemNativeWhat_PcShadow:
    5572                 case kIemNativeWhat_FixedReserved:
    5573                 case kIemNativeWhat_Invalid:
    5574                 case kIemNativeWhat_End:
    5575                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
    5576             }
    5577             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
    5578         }
    5579     }
    5580 
    5581     /*
    5582      * Do the actual freeing.
    5583      */
    5584     if (pReNative->Core.bmHstRegs & fRegsToFree)
    5585         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
    5586                pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
    5587     pReNative->Core.bmHstRegs &= ~fRegsToFree;
    5588 
    5589     /* If there are guest register shadows in any call-volatile register, we
    5590        have to clear the corrsponding guest register masks for each register. */
    5591     uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
    5592     if (fHstRegsWithGstShadow)
    5593     {
    5594         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
    5595                pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
    5596         pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
    5597         do
    5598         {
    5599             unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
    5600             fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    5601 
    5602             AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
    5603 
    5604 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5605             /*
    5606              * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
    5607              * to call volatile registers).
    5608              */
    5609             if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
    5610                 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
    5611             Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
    5612 #endif
    5613 
    5614             pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    5615             pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    5616         } while (fHstRegsWithGstShadow != 0);
    5617     }
    5618 
    5619 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    5620     /* Now for the SIMD registers, no argument support for now. */
    5621     off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
    5622 #endif
    5623 
    5624     return off;
    5625 }
    5626 
    5627 
    5628 /**
    5629  * Flushes a set of guest register shadow copies.
    5630  *
    5631  * This is usually done after calling a threaded function or a C-implementation
    5632  * of an instruction.
    5633  *
    5634  * @param   pReNative       The native recompile state.
    5635  * @param   fGstRegs        Set of guest registers to flush.
    5636  */
    5637 DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
    5638 {
    5639     /*
    5640      * Reduce the mask by what's currently shadowed
    5641      */
    5642     uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
    5643     fGstRegs &= bmGstRegShadowsOld;
    5644     if (fGstRegs)
    5645     {
    5646         uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
    5647         Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
    5648         pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
    5649         if (bmGstRegShadowsNew)
    5650         {
    5651             /*
    5652              * Partial.
    5653              */
    5654             do
    5655             {
    5656                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    5657                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    5658                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    5659                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    5660                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    5661 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5662                 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
    5663 #endif
    5664 
    5665                 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
    5666                 fGstRegs &= ~fInThisHstReg;
    5667                 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
    5668                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    5669                 if (!fGstRegShadowsNew)
    5670                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    5671             } while (fGstRegs != 0);
    5672         }
    5673         else
    5674         {
    5675             /*
    5676              * Clear all.
    5677              */
    5678             do
    5679             {
    5680                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    5681                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    5682                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    5683                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    5684                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    5685 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5686                 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
    5687 #endif
    5688 
    5689                 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
    5690                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    5691             } while (fGstRegs != 0);
    5692             pReNative->Core.bmHstRegsWithGstShadow = 0;
    5693         }
    5694     }
    5695 }
    5696 
    5697 
    5698 /**
    5699  * Flushes guest register shadow copies held by a set of host registers.
    5700  *
    5701  * This is used with the TLB lookup code for ensuring that we don't carry on
    5702  * with any guest shadows in volatile registers, as these will get corrupted by
    5703  * a TLB miss.
    5704  *
    5705  * @param   pReNative       The native recompile state.
    5706  * @param   fHstRegs        Set of host registers to flush guest shadows for.
    5707  */
    5708 DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
    5709 {
    5710     /*
    5711      * Reduce the mask by what's currently shadowed.
    5712      */
    5713     uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
    5714     fHstRegs &= bmHstRegsWithGstShadowOld;
    5715     if (fHstRegs)
    5716     {
    5717         uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
    5718         Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
    5719                fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
    5720         pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
    5721         if (bmHstRegsWithGstShadowNew)
    5722         {
    5723             /*
    5724              * Partial (likely).
    5725              */
    5726             uint64_t fGstShadows = 0;
    5727             do
    5728             {
    5729                 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    5730                 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
    5731                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    5732                        == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    5733 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5734                 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    5735 #endif
    5736 
    5737                 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    5738                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    5739                 fHstRegs &= ~RT_BIT_32(idxHstReg);
    5740             } while (fHstRegs != 0);
    5741             pReNative->Core.bmGstRegShadows &= ~fGstShadows;
    5742         }
    5743         else
    5744         {
    5745             /*
    5746              * Clear all.
    5747              */
    5748             do
    5749             {
    5750                 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    5751                 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
    5752                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    5753                        == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    5754 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5755                 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    5756 #endif
    5757 
    5758                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    5759                 fHstRegs &= ~RT_BIT_32(idxHstReg);
    5760             } while (fHstRegs != 0);
    5761             pReNative->Core.bmGstRegShadows = 0;
    5762         }
    5763     }
    5764 }
    5765 
    5766 
    5767 /**
    5768  * Restores guest shadow copies in volatile registers.
    5769  *
    5770  * This is used after calling a helper function (think TLB miss) to restore the
    5771  * register state of volatile registers.
    5772  *
    5773  * @param   pReNative               The native recompile state.
    5774  * @param   off                     The code buffer offset.
    5775  * @param   fHstRegsActiveShadows   Set of host registers which are allowed to
    5776  *                                  be active (allocated) w/o asserting. Hack.
    5777  * @see     iemNativeVarSaveVolatileRegsPreHlpCall(),
    5778  *          iemNativeVarRestoreVolatileRegsPostHlpCall()
    5779  */
    5780 DECL_HIDDEN_THROW(uint32_t)
    5781 iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
    5782 {
    5783     uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    5784     if (fHstRegs)
    5785     {
    5786         Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
    5787         do
    5788         {
    5789             unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    5790 
    5791             /* It's not fatal if a register is active holding a variable that
    5792                shadowing a guest register, ASSUMING all pending guest register
    5793                writes were flushed prior to the helper call. However, we'll be
    5794                emitting duplicate restores, so it wasts code space. */
    5795             Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
    5796             RT_NOREF(fHstRegsActiveShadows);
    5797 
    5798             uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    5799 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5800             Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
    5801 #endif
    5802             Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
    5803             AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
    5804                        IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
    5805 
    5806             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    5807             off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
    5808 
    5809             fHstRegs &= ~RT_BIT_32(idxHstReg);
    5810         } while (fHstRegs != 0);
    5811     }
    5812     return off;
    5813 }
    5814 
    5815 
    5816 
    5817 
    5818 /*********************************************************************************************************************************
    5819 *   SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge)                            *
    5820 *********************************************************************************************************************************/
    5821 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    5822 
    5823 /**
    5824  * Info about shadowed guest SIMD register values.
    5825  * @see IEMNATIVEGSTSIMDREG
    5826  */
    5827 static struct
    5828 {
    5829     /** Offset in VMCPU of XMM (low 128-bit) registers. */
    5830     uint32_t    offXmm;
    5831     /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
    5832     uint32_t    offYmm;
    5833     /** Name (for logging). */
    5834     const char *pszName;
    5835 } const g_aGstSimdShadowInfo[] =
    5836 {
    5837 #define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
    5838                                          (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
    5839     /* [kIemNativeGstSimdReg_SimdRegFirst +  0] = */  { CPUMCTX_OFF_AND_SIZE(0),  "ymm0",  },
    5840     /* [kIemNativeGstSimdReg_SimdRegFirst +  1] = */  { CPUMCTX_OFF_AND_SIZE(1),  "ymm1",  },
    5841     /* [kIemNativeGstSimdReg_SimdRegFirst +  2] = */  { CPUMCTX_OFF_AND_SIZE(2),  "ymm2",  },
    5842     /* [kIemNativeGstSimdReg_SimdRegFirst +  3] = */  { CPUMCTX_OFF_AND_SIZE(3),  "ymm3",  },
    5843     /* [kIemNativeGstSimdReg_SimdRegFirst +  4] = */  { CPUMCTX_OFF_AND_SIZE(4),  "ymm4",  },
    5844     /* [kIemNativeGstSimdReg_SimdRegFirst +  5] = */  { CPUMCTX_OFF_AND_SIZE(5),  "ymm5",  },
    5845     /* [kIemNativeGstSimdReg_SimdRegFirst +  6] = */  { CPUMCTX_OFF_AND_SIZE(6),  "ymm6",  },
    5846     /* [kIemNativeGstSimdReg_SimdRegFirst +  7] = */  { CPUMCTX_OFF_AND_SIZE(7),  "ymm7",  },
    5847     /* [kIemNativeGstSimdReg_SimdRegFirst +  8] = */  { CPUMCTX_OFF_AND_SIZE(8),  "ymm8",  },
    5848     /* [kIemNativeGstSimdReg_SimdRegFirst +  9] = */  { CPUMCTX_OFF_AND_SIZE(9),  "ymm9",  },
    5849     /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */  { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
    5850     /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */  { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
    5851     /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */  { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
    5852     /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */  { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
    5853     /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */  { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
    5854     /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */  { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
    5855 #undef CPUMCTX_OFF_AND_SIZE
    5856 };
    5857 AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
    5858 
    5859 
    5860 /**
    5861  * Frees a temporary SIMD register.
    5862  *
    5863  * Any shadow copies of guest registers assigned to the host register will not
    5864  * be flushed by this operation.
    5865  */
    5866 DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
    5867 {
    5868     Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
    5869     Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
    5870     pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
    5871     Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
    5872            g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
    5873 }
    5874 
    5875 
    5876 /**
    5877  * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
    5878  *
    5879  * @returns New code bufferoffset.
    5880  * @param   pReNative       The native recompile state.
    5881  * @param   off             Current code buffer position.
    5882  * @param   enmGstSimdReg   The guest SIMD register to flush.
    5883  */
    5884 DECL_HIDDEN_THROW(uint32_t)
    5885 iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
    5886 {
    5887     uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
    5888 
    5889     Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
    5890            g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
    5891            IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
    5892            IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
    5893 
    5894     if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
    5895     {
    5896         Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    5897                || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
    5898         off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    5899     }
    5900 
    5901     if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
    5902     {
    5903         Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    5904                || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
    5905         off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    5906     }
    5907 
    5908     IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
    5909     return off;
    5910 }
    5911 
    5912 
    5913 /**
    5914  * Flush the given set of guest SIMD registers if marked as dirty.
    5915  *
    5916  * @returns New code buffer offset.
    5917  * @param   pReNative           The native recompile state.
    5918  * @param   off                 Current code buffer position.
    5919  * @param   fFlushGstSimdReg    The guest SIMD register set to flush (default is flush everything).
    5920  */
    5921 DECL_HIDDEN_THROW(uint32_t)
    5922 iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
    5923 {
    5924     uint64_t bmGstSimdRegShadowDirty =   (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    5925                                        & fFlushGstSimdReg;
    5926     if (bmGstSimdRegShadowDirty)
    5927     {
    5928 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5929         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5930         iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
    5931 # endif
    5932 
    5933         do
    5934         {
    5935             unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
    5936             bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
    5937             off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
    5938         } while (bmGstSimdRegShadowDirty);
    5939     }
    5940 
    5941     return off;
    5942 }
    5943 
    5944 
    5945 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    5946 /**
    5947  * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
    5948  *
    5949  * @returns New code buffer offset.
    5950  * @param   pReNative       The native recompile state.
    5951  * @param   off             Current code buffer position.
    5952  * @param   idxHstSimdReg   The host SIMD register.
    5953  *
    5954  * @note This doesn't do any unshadowing of guest registers from the host register.
    5955  */
    5956 DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
    5957 {
    5958     /* We need to flush any pending guest register writes this host register shadows. */
    5959     uint64_t bmGstSimdRegShadowDirty =   (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    5960                                        & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
    5961     if (bmGstSimdRegShadowDirty)
    5962     {
    5963 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    5964         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    5965         iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
    5966 # endif
    5967 
    5968         do
    5969         {
    5970             unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
    5971             bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
    5972             off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
    5973             Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
    5974         } while (bmGstSimdRegShadowDirty);
    5975     }
    5976 
    5977     return off;
    5978 }
    5979 #endif
    5980 
    5981 
    5982 /**
    5983  * Locate a register, possibly freeing one up.
    5984  *
    5985  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    5986  * failed.
    5987  *
    5988  * @returns Host register number on success. Returns UINT8_MAX if no registers
    5989  *          found, the caller is supposed to deal with this and raise a
    5990  *          allocation type specific status code (if desired).
    5991  *
    5992  * @throws  VBox status code if we're run into trouble spilling a variable of
    5993  *          recording debug info.  Does NOT throw anything if we're out of
    5994  *          registers, though.
    5995  */
    5996 static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
    5997                                              uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
    5998 {
    5999     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
    6000     Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
    6001     Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
    6002 
    6003     /*
    6004      * Try a freed register that's shadowing a guest register.
    6005      */
    6006     uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
    6007     if (fRegs)
    6008     {
    6009         STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
    6010 
    6011 #if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    6012         /*
    6013          * When we have livness information, we use it to kick out all shadowed
    6014          * guest register that will not be needed any more in this TB.  If we're
    6015          * lucky, this may prevent us from ending up here again.
    6016          *
    6017          * Note! We must consider the previous entry here so we don't free
    6018          *       anything that the current threaded function requires (current
    6019          *       entry is produced by the next threaded function).
    6020          */
    6021         uint32_t const idxCurCall = pReNative->idxCurCall;
    6022         if (idxCurCall > 0)
    6023         {
    6024             PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
    6025 
    6026 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    6027             /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
    6028             AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
    6029             uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
    6030 #else
    6031             /* Construct a mask of the registers not in the read or write state.
    6032                Note! We could skips writes, if they aren't from us, as this is just
    6033                      a hack to prevent trashing registers that have just been written
    6034                      or will be written when we retire the current instruction. */
    6035             uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
    6036                                  & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
    6037                                  & IEMLIVENESSBIT_MASK;
    6038 #endif
    6039             /* If it matches any shadowed registers. */
    6040             if (pReNative->Core.bmGstRegShadows & fToFreeMask)
    6041             {
    6042                 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
    6043                 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
    6044                 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
    6045 
    6046                 /* See if we've got any unshadowed registers we can return now. */
    6047                 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
    6048                 if (fUnshadowedRegs)
    6049                 {
    6050                     STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
    6051                     return (fPreferVolatile
    6052                             ? ASMBitFirstSetU32(fUnshadowedRegs)
    6053                             : ASMBitLastSetU32(  fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    6054                                                ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
    6055                          - 1;
    6056                 }
    6057             }
    6058         }
    6059 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
    6060 
    6061         unsigned const idxReg = (fPreferVolatile
    6062                                  ? ASMBitFirstSetU32(fRegs)
    6063                                  : ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    6064                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
    6065                               - 1;
    6066 
    6067         Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
    6068         Assert(   (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
    6069                == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
    6070         Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
    6071 
    6072         /* We need to flush any pending guest register writes this host SIMD register shadows. */
    6073         *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
    6074 
    6075         pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    6076         pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
    6077         pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
    6078         pReNative->Core.aHstSimdRegs[idxReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    6079         return idxReg;
    6080     }
    6081 
    6082     AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
    6083 
    6084     /*
    6085      * Try free up a variable that's in a register.
    6086      *
    6087      * We do two rounds here, first evacuating variables we don't need to be
    6088      * saved on the stack, then in the second round move things to the stack.
    6089      */
    6090     STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
    6091     for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
    6092     {
    6093         uint32_t fVars = pReNative->Core.bmVars;
    6094         while (fVars)
    6095         {
    6096             uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
    6097             uint8_t const  idxReg = pReNative->Core.aVars[idxVar].idxReg;
    6098             if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
    6099                 continue;
    6100 
    6101             if (   idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
    6102                 && (RT_BIT_32(idxReg) & fRegMask)
    6103                 && (  iLoop == 0
    6104                     ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
    6105                     : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    6106                 && !pReNative->Core.aVars[idxVar].fRegAcquired)
    6107             {
    6108                 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
    6109                 Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
    6110                        == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
    6111                 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
    6112                 Assert(   RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
    6113                        == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
    6114 
    6115                 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    6116                 {
    6117                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    6118                     *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
    6119                 }
    6120 
    6121                 pReNative->Core.aVars[idxVar].idxReg        = UINT8_MAX;
    6122                 pReNative->Core.bmHstSimdRegs              &= ~RT_BIT_32(idxReg);
    6123 
    6124                 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    6125                 pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    6126                 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
    6127                 return idxReg;
    6128             }
    6129             fVars &= ~RT_BIT_32(idxVar);
    6130         }
    6131     }
    6132 
    6133     AssertFailed();
    6134     return UINT8_MAX;
    6135 }
    6136 
    6137 
    6138 /**
    6139  * Flushes a set of guest register shadow copies.
    6140  *
    6141  * This is usually done after calling a threaded function or a C-implementation
    6142  * of an instruction.
    6143  *
    6144  * @param   pReNative       The native recompile state.
    6145  * @param   fGstSimdRegs    Set of guest SIMD registers to flush.
    6146  */
    6147 DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
    6148 {
    6149     /*
    6150      * Reduce the mask by what's currently shadowed
    6151      */
    6152     uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
    6153     fGstSimdRegs &= bmGstSimdRegShadows;
    6154     if (fGstSimdRegs)
    6155     {
    6156         uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
    6157         Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
    6158         pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
    6159         if (bmGstSimdRegShadowsNew)
    6160         {
    6161             /*
    6162              * Partial.
    6163              */
    6164             do
    6165             {
    6166                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
    6167                 uint8_t const  idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
    6168                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
    6169                 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
    6170                 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    6171                 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
    6172 
    6173                 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
    6174                 fGstSimdRegs &= ~fInThisHstReg;
    6175                 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
    6176                 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    6177                 if (!fGstRegShadowsNew)
    6178                 {
    6179                     pReNative->Core.bmHstSimdRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
    6180                     pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded  = kIemNativeGstSimdRegLdStSz_Invalid;
    6181                 }
    6182             } while (fGstSimdRegs != 0);
    6183         }
    6184         else
    6185         {
    6186             /*
    6187              * Clear all.
    6188              */
    6189             do
    6190             {
    6191                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
    6192                 uint8_t const  idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
    6193                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
    6194                 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
    6195                 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    6196                 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
    6197 
    6198                 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
    6199                 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
    6200                 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded      = kIemNativeGstSimdRegLdStSz_Invalid;
    6201             } while (fGstSimdRegs != 0);
    6202             pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
    6203         }
    6204     }
    6205 }
    6206 
    6207 
    6208 /**
    6209  * Allocates a temporary host SIMD register.
    6210  *
    6211  * This may emit code to save register content onto the stack in order to free
    6212  * up a register.
    6213  *
    6214  * @returns The host register number; throws VBox status code on failure,
    6215  *          so no need to check the return value.
    6216  * @param   pReNative       The native recompile state.
    6217  * @param   poff            Pointer to the variable with the code buffer position.
    6218  *                          This will be update if we need to move a variable from
    6219  *                          register to stack in order to satisfy the request.
    6220  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    6221  *                          registers (@c true, default) or the other way around
    6222  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    6223  */
    6224 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
    6225 {
    6226     /*
    6227      * Try find a completely unused register, preferably a call-volatile one.
    6228      */
    6229     uint8_t  idxSimdReg;
    6230     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    6231                    & ~pReNative->Core.bmHstRegsWithGstShadow
    6232                    & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
    6233     if (fRegs)
    6234     {
    6235         if (fPreferVolatile)
    6236             idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    6237                                                     ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    6238         else
    6239             idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    6240                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    6241         Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
    6242         Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
    6243 
    6244         pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
    6245         Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    6246     }
    6247     else
    6248     {
    6249         idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
    6250         AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    6251         Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    6252     }
    6253 
    6254     Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
    6255     return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
    6256 }
    6257 
    6258 
    6259 /**
    6260  * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
    6261  * registers.
    6262  *
    6263  * @returns The host register number; throws VBox status code on failure,
    6264  *          so no need to check the return value.
    6265  * @param   pReNative       The native recompile state.
    6266  * @param   poff            Pointer to the variable with the code buffer position.
    6267  *                          This will be update if we need to move a variable from
    6268  *                          register to stack in order to satisfy the request.
    6269  * @param   fRegMask        Mask of acceptable registers.
    6270  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    6271  *                          registers (@c true, default) or the other way around
    6272  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    6273  */
    6274 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
    6275                                                       bool fPreferVolatile /*= true*/)
    6276 {
    6277     Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
    6278     Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
    6279 
    6280     /*
    6281      * Try find a completely unused register, preferably a call-volatile one.
    6282      */
    6283     uint8_t  idxSimdReg;
    6284     uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
    6285                    & ~pReNative->Core.bmHstSimdRegsWithGstShadow
    6286                    & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
    6287                    & fRegMask;
    6288     if (fRegs)
    6289     {
    6290         if (fPreferVolatile)
    6291             idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    6292                                                     ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    6293         else
    6294             idxSimdReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    6295                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    6296         Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
    6297         Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
    6298 
    6299         pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
    6300         Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    6301     }
    6302     else
    6303     {
    6304         idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
    6305         AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    6306         Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
    6307     }
    6308 
    6309     Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
    6310     return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
    6311 }
    6312 
    6313 
    6314 /**
    6315  * Sets the indiactor for which part of the given SIMD register has valid data loaded.
    6316  *
    6317  * @param   pReNative       The native recompile state.
    6318  * @param   idxHstSimdReg   The host SIMD register to update the state for.
    6319  * @param   enmLoadSz       The load size to set.
    6320  */
    6321 DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
    6322                                                          IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    6323 {
    6324     /* Everything valid already? -> nothing to do. */
    6325     if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
    6326         return;
    6327 
    6328     if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
    6329         pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
    6330     else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
    6331     {
    6332         Assert(   (   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
    6333                    && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
    6334                || (   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
    6335                    && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
    6336         pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
    6337     }
    6338 }
    6339 
    6340 
    6341 static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
    6342                                                             uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
    6343 {
    6344     /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
    6345     if (   pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
    6346         || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
    6347     {
    6348 # ifdef RT_ARCH_ARM64
    6349         /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
    6350         Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
    6351 # endif
    6352 
    6353         if (idxHstSimdRegDst != idxHstSimdRegSrc)
    6354         {
    6355             switch (enmLoadSzDst)
    6356             {
    6357                 case kIemNativeGstSimdRegLdStSz_256:
    6358                     off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
    6359                     break;
    6360                 case kIemNativeGstSimdRegLdStSz_Low128:
    6361                     off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
    6362                     break;
    6363                 case kIemNativeGstSimdRegLdStSz_High128:
    6364                     off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
    6365                     break;
    6366                 default:
    6367                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    6368             }
    6369 
    6370             iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
    6371         }
    6372     }
    6373     else
    6374     {
    6375         /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
    6376         Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
    6377         off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
    6378     }
    6379 
    6380     return off;
    6381 }
    6382 
    6383 
    6384 /**
    6385  * Allocates a temporary host SIMD register for keeping a guest
    6386  * SIMD register value.
    6387  *
    6388  * Since we may already have a register holding the guest register value,
    6389  * code will be emitted to do the loading if that's not the case. Code may also
    6390  * be emitted if we have to free up a register to satify the request.
    6391  *
    6392  * @returns The host register number; throws VBox status code on failure, so no
    6393  *          need to check the return value.
    6394  * @param   pReNative       The native recompile state.
    6395  * @param   poff            Pointer to the variable with the code buffer
    6396  *                          position. This will be update if we need to move a
    6397  *                          variable from register to stack in order to satisfy
    6398  *                          the request.
    6399  * @param   enmGstSimdReg   The guest SIMD register that will is to be updated.
    6400  * @param   enmIntendedUse  How the caller will be using the host register.
    6401  * @param   fNoVolatileRegs Set if no volatile register allowed, clear if any
    6402  *                          register is okay (default).  The ASSUMPTION here is
    6403  *                          that the caller has already flushed all volatile
    6404  *                          registers, so this is only applied if we allocate a
    6405  *                          new register.
    6406  * @sa      iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
    6407  */
    6408 DECL_HIDDEN_THROW(uint8_t)
    6409 iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
    6410                                         IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
    6411                                         bool fNoVolatileRegs /*= false*/)
    6412 {
    6413     Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
    6414 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
    6415     AssertMsg(   pReNative->idxCurCall == 0
    6416               || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    6417                   ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
    6418                   : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
    6419                   ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
    6420                   : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(  iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
    6421               ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
    6422 #endif
    6423 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)
    6424     static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
    6425 #endif
    6426     uint32_t const fRegMask = !fNoVolatileRegs
    6427                             ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
    6428                             : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
    6429 
    6430     /*
    6431      * First check if the guest register value is already in a host register.
    6432      */
    6433     if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
    6434     {
    6435         uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
    6436         Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
    6437         Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
    6438         Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
    6439 
    6440         /* It's not supposed to be allocated... */
    6441         if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
    6442         {
    6443             /*
    6444              * If the register will trash the guest shadow copy, try find a
    6445              * completely unused register we can use instead.  If that fails,
    6446              * we need to disassociate the host reg from the guest reg.
    6447              */
    6448             /** @todo would be nice to know if preserving the register is in any way helpful. */
    6449             /* If the purpose is calculations, try duplicate the register value as
    6450                we'll be clobbering the shadow. */
    6451             if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
    6452                 && (  ~pReNative->Core.bmHstSimdRegs
    6453                     & ~pReNative->Core.bmHstSimdRegsWithGstShadow
    6454                     & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
    6455             {
    6456                 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
    6457 
    6458                 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
    6459 
    6460                 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
    6461                        g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
    6462                        g_apszIemNativeHstSimdRegNames[idxRegNew]));
    6463                 idxSimdReg = idxRegNew;
    6464             }
    6465             /* If the current register matches the restrictions, go ahead and allocate
    6466                it for the caller. */
    6467             else if (fRegMask & RT_BIT_32(idxSimdReg))
    6468             {
    6469                 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
    6470                 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
    6471                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    6472                 {
    6473                     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    6474                         *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
    6475                     else
    6476                         iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
    6477                     Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
    6478                            g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    6479                 }
    6480                 else
    6481                 {
    6482                     iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
    6483                     Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
    6484                            g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
    6485                 }
    6486             }
    6487             /* Otherwise, allocate a register that satisfies the caller and transfer
    6488                the shadowing if compatible with the intended use.  (This basically
    6489                means the call wants a non-volatile register (RSP push/pop scenario).) */
    6490             else
    6491             {
    6492                 Assert(fNoVolatileRegs);
    6493                 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
    6494                                                                     !fNoVolatileRegs
    6495                                                                  && enmIntendedUse == kIemNativeGstRegUse_Calculation);
    6496                 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
    6497                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    6498                 {
    6499                     iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
    6500                     Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
    6501                            g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
    6502                            g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    6503                 }
    6504                 else
    6505                     Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
    6506                            g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
    6507                            g_apszIemNativeHstSimdRegNames[idxRegNew]));
    6508                 idxSimdReg = idxRegNew;
    6509             }
    6510         }
    6511         else
    6512         {
    6513             /*
    6514              * Oops. Shadowed guest register already allocated!
    6515              *
    6516              * Allocate a new register, copy the value and, if updating, the
    6517              * guest shadow copy assignment to the new register.
    6518              */
    6519             AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    6520                       && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
    6521                       ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
    6522                        idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
    6523 
    6524             /** @todo share register for readonly access. */
    6525             uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
    6526                                                                  enmIntendedUse == kIemNativeGstRegUse_Calculation);
    6527 
    6528             if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    6529                 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
    6530             else
    6531                 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
    6532 
    6533             if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    6534                 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    6535                 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
    6536                        g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
    6537                        g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    6538             else
    6539             {
    6540                 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
    6541                 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
    6542                        g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
    6543                        g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    6544             }
    6545             idxSimdReg = idxRegNew;
    6546         }
    6547         Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
    6548 
    6549 #ifdef VBOX_STRICT
    6550         /* Strict builds: Check that the value is correct. */
    6551         if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    6552             *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
    6553 #endif
    6554 
    6555         if (   enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    6556             || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
    6557         {
    6558 # if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
    6559             iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
    6560             iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
    6561 # endif
    6562 
    6563             if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
    6564                 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
    6565             else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
    6566                 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
    6567             else
    6568             {
    6569                 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
    6570                 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
    6571                 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
    6572             }
    6573         }
    6574 
    6575         return idxSimdReg;
    6576     }
    6577 
    6578     /*
    6579      * Allocate a new register, load it with the guest value and designate it as a copy of the
    6580      */
    6581     uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
    6582 
    6583     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    6584         *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
    6585     else
    6586         iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
    6587 
    6588     if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    6589         iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
    6590 
    6591     if (   enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
    6592         || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
    6593     {
    6594 # if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
    6595         iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
    6596         iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
    6597 # endif
    6598 
    6599         if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
    6600             IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
    6601         else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
    6602             IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
    6603         else
    6604         {
    6605             Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
    6606             IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
    6607             IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
    6608         }
    6609     }
    6610 
    6611     Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
    6612            g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    6613 
    6614     return idxRegNew;
    6615 }
    6616 
    6617 
    6618 /**
    6619  * Flushes guest SIMD register shadow copies held by a set of host registers.
    6620  *
    6621  * This is used whenever calling an external helper for ensuring that we don't carry on
    6622  * with any guest shadows in volatile registers, as these will get corrupted by the caller.
    6623  *
    6624  * @param   pReNative       The native recompile state.
    6625  * @param   fHstSimdRegs    Set of host SIMD registers to flush guest shadows for.
    6626  */
    6627 DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
    6628 {
    6629     /*
    6630      * Reduce the mask by what's currently shadowed.
    6631      */
    6632     uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
    6633     fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
    6634     if (fHstSimdRegs)
    6635     {
    6636         uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
    6637         Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
    6638                fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
    6639         pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
    6640         if (bmHstSimdRegsWithGstShadowNew)
    6641         {
    6642             /*
    6643              * Partial (likely).
    6644              */
    6645             uint64_t fGstShadows = 0;
    6646             do
    6647             {
    6648                 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
    6649                 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
    6650                 Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
    6651                        == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
    6652                 Assert(!((  pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    6653                           & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
    6654 
    6655                 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
    6656                 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
    6657                 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
    6658             } while (fHstSimdRegs != 0);
    6659             pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
    6660         }
    6661         else
    6662         {
    6663             /*
    6664              * Clear all.
    6665              */
    6666             do
    6667             {
    6668                 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
    6669                 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
    6670                 Assert(   (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
    6671                        == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
    6672                 Assert(!(  (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    6673                          & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
    6674 
    6675                 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
    6676                 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
    6677             } while (fHstSimdRegs != 0);
    6678             pReNative->Core.bmGstSimdRegShadows = 0;
    6679         }
    6680     }
    6681 }
    6682 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    6683 
    6684 
    6685 
    6686 /*********************************************************************************************************************************
    6687 *   Code emitters for flushing pending guest register writes and sanity checks                                                   *
    6688 *********************************************************************************************************************************/
    6689 
    6690 #ifdef VBOX_STRICT
    6691 /**
    6692  * Does internal register allocator sanity checks.
    6693  */
    6694 DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
    6695 {
    6696     /*
    6697      * Iterate host registers building a guest shadowing set.
    6698      */
    6699     uint64_t bmGstRegShadows        = 0;
    6700     uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
    6701     AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
    6702     while (bmHstRegsWithGstShadow)
    6703     {
    6704         unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
    6705         Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    6706         bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    6707 
    6708         uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    6709         AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
    6710         AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
    6711         bmGstRegShadows |= fThisGstRegShadows;
    6712         while (fThisGstRegShadows)
    6713         {
    6714             unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
    6715             fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
    6716             AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
    6717                       ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
    6718                        idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
    6719         }
    6720     }
    6721     AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
    6722               ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
    6723                bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
    6724 
    6725     /*
    6726      * Now the other way around, checking the guest to host index array.
    6727      */
    6728     bmHstRegsWithGstShadow = 0;
    6729     bmGstRegShadows        = pReNative->Core.bmGstRegShadows;
    6730     Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    6731     while (bmGstRegShadows)
    6732     {
    6733         unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
    6734         Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    6735         bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
    6736 
    6737         uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    6738         AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
    6739         AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
    6740                   ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
    6741                    idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    6742         bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
    6743     }
    6744     AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
    6745               ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
    6746                bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
    6747 }
    6748 #endif /* VBOX_STRICT */
    6749 
    6750 
    6751 /**
    6752  * Flushes any delayed guest register writes.
    6753  *
    6754  * This must be called prior to calling CImpl functions and any helpers that use
    6755  * the guest state (like raising exceptions) and such.
    6756  *
    6757  * @note This function does not flush any shadowing information for guest registers. This needs to be done by
    6758  *       the caller if it wishes to do so.
    6759  */
    6760 DECL_HIDDEN_THROW(uint32_t)
    6761 iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
    6762 {
    6763 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    6764     if (!(fGstShwExcept & kIemNativeGstReg_Pc))
    6765         off = iemNativeEmitPcWriteback(pReNative, off);
    6766 #else
    6767     RT_NOREF(pReNative, fGstShwExcept);
    6768 #endif
    6769 
    6770 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    6771     off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
    6772 #endif
    6773 
    6774 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    6775     off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
    6776 #endif
    6777 
    6778     return off;
    6779 }
    6780 
    6781 
    6782 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    6783 /**
    6784  * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
    6785  */
    6786 DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    6787 {
    6788     Assert(pReNative->Core.offPc);
    6789 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    6790     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    6791     iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
    6792 # endif
    6793 
    6794 # ifndef IEMNATIVE_REG_FIXED_PC_DBG
    6795     /* Allocate a temporary PC register. */
    6796     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    6797 
    6798     /* Perform the addition and store the result. */
    6799     off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
    6800     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6801 
    6802     /* Free but don't flush the PC register. */
    6803     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6804 # else
    6805     /* Compare the shadow with the context value, they should match. */
    6806     off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
    6807     off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
    6808 # endif
    6809 
    6810     STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
    6811     pReNative->Core.offPc                 = 0;
    6812     pReNative->Core.cInstrPcUpdateSkipped = 0;
    6813 
    6814     return off;
    6815 }
    6816 #endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
    6817 
    6818 
    6819 /*********************************************************************************************************************************
    6820 *   Code Emitters (larger snippets)                                                                                              *
    6821 *********************************************************************************************************************************/
    6822 
    6823 /**
    6824  * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
    6825  * extending to 64-bit width.
    6826  *
    6827  * @returns New code buffer offset on success, UINT32_MAX on failure.
    6828  * @param   pReNative   .
    6829  * @param   off         The current code buffer position.
    6830  * @param   idxHstReg   The host register to load the guest register value into.
    6831  * @param   enmGstReg   The guest register to load.
    6832  *
    6833  * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
    6834  *       that is something the caller needs to do if applicable.
    6835  */
    6836 DECL_HIDDEN_THROW(uint32_t)
    6837 iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
    6838 {
    6839     Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
    6840     Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
    6841 
    6842     switch (g_aGstShadowInfo[enmGstReg].cb)
    6843     {
    6844         case sizeof(uint64_t):
    6845             return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    6846         case sizeof(uint32_t):
    6847             return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    6848         case sizeof(uint16_t):
    6849             return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    6850 #if 0 /* not present in the table. */
    6851         case sizeof(uint8_t):
    6852             return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    6853 #endif
    6854         default:
    6855             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    6856     }
    6857 }
    6858 
    6859 
    6860 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    6861 /**
    6862  * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
    6863  *
    6864  * @returns New code buffer offset on success, UINT32_MAX on failure.
    6865  * @param   pReNative       The recompiler state.
    6866  * @param   off             The current code buffer position.
    6867  * @param   idxHstSimdReg   The host register to load the guest register value into.
    6868  * @param   enmGstSimdReg   The guest register to load.
    6869  * @param   enmLoadSz       The load size of the register.
    6870  *
    6871  * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
    6872  *       that is something the caller needs to do if applicable.
    6873  */
    6874 DECL_HIDDEN_THROW(uint32_t)
    6875 iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
    6876                                              IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    6877 {
    6878     Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
    6879 
    6880     iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
    6881     switch (enmLoadSz)
    6882     {
    6883         case kIemNativeGstSimdRegLdStSz_256:
    6884             off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    6885             return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    6886         case kIemNativeGstSimdRegLdStSz_Low128:
    6887             return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    6888         case kIemNativeGstSimdRegLdStSz_High128:
    6889             return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    6890         default:
    6891             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    6892     }
    6893 }
    6894 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    6895 
    6896 #ifdef VBOX_STRICT
    6897 
    6898 /**
    6899  * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
    6900  *
    6901  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    6902  *       Trashes EFLAGS on AMD64.
    6903  */
    6904 DECL_HIDDEN_THROW(uint32_t)
    6905 iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
    6906 {
    6907 # ifdef RT_ARCH_AMD64
    6908     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    6909 
    6910     /* rol reg64, 32 */
    6911     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    6912     pbCodeBuf[off++] = 0xc1;
    6913     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    6914     pbCodeBuf[off++] = 32;
    6915 
    6916     /* test reg32, ffffffffh */
    6917     if (idxReg >= 8)
    6918         pbCodeBuf[off++] = X86_OP_REX_B;
    6919     pbCodeBuf[off++] = 0xf7;
    6920     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    6921     pbCodeBuf[off++] = 0xff;
    6922     pbCodeBuf[off++] = 0xff;
    6923     pbCodeBuf[off++] = 0xff;
    6924     pbCodeBuf[off++] = 0xff;
    6925 
    6926     /* je/jz +1 */
    6927     pbCodeBuf[off++] = 0x74;
    6928     pbCodeBuf[off++] = 0x01;
    6929 
    6930     /* int3 */
    6931     pbCodeBuf[off++] = 0xcc;
    6932 
    6933     /* rol reg64, 32 */
    6934     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    6935     pbCodeBuf[off++] = 0xc1;
    6936     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    6937     pbCodeBuf[off++] = 32;
    6938 
    6939 # elif defined(RT_ARCH_ARM64)
    6940     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    6941     /* lsr tmp0, reg64, #32 */
    6942     pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
    6943     /* cbz tmp0, +1 */
    6944     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    6945     /* brk #0x1100 */
    6946     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
    6947 
    6948 # else
    6949 #  error "Port me!"
    6950 # endif
    6951     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6952     return off;
    6953 }
    6954 
    6955 
    6956 /**
    6957  * Emitting code that checks that the content of register @a idxReg is the same
    6958  * as what's in the guest register @a enmGstReg, resulting in a breakpoint
    6959  * instruction if that's not the case.
    6960  *
    6961  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    6962  *       Trashes EFLAGS on AMD64.
    6963  */
    6964 DECL_HIDDEN_THROW(uint32_t)
    6965 iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
    6966 {
    6967 #if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
    6968     /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
    6969     if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
    6970         return off;
    6971 #endif
    6972 
    6973 # ifdef RT_ARCH_AMD64
    6974     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    6975 
    6976     /* cmp reg, [mem] */
    6977     if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
    6978     {
    6979         if (idxReg >= 8)
    6980             pbCodeBuf[off++] = X86_OP_REX_R;
    6981         pbCodeBuf[off++] = 0x38;
    6982     }
    6983     else
    6984     {
    6985         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
    6986             pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
    6987         else
    6988         {
    6989             if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
    6990                 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    6991             else
    6992                 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
    6993                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
    6994             if (idxReg >= 8)
    6995                 pbCodeBuf[off++] = X86_OP_REX_R;
    6996         }
    6997         pbCodeBuf[off++] = 0x39;
    6998     }
    6999     off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
    7000 
    7001     /* je/jz +1 */
    7002     pbCodeBuf[off++] = 0x74;
    7003     pbCodeBuf[off++] = 0x01;
    7004 
    7005     /* int3 */
    7006     pbCodeBuf[off++] = 0xcc;
    7007 
    7008     /* For values smaller than the register size, we must check that the rest
    7009        of the register is all zeros. */
    7010     if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
    7011     {
    7012         /* test reg64, imm32 */
    7013         pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    7014         pbCodeBuf[off++] = 0xf7;
    7015         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    7016         pbCodeBuf[off++] = 0;
    7017         pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
    7018         pbCodeBuf[off++] = 0xff;
    7019         pbCodeBuf[off++] = 0xff;
    7020 
    7021         /* je/jz +1 */
    7022         pbCodeBuf[off++] = 0x74;
    7023         pbCodeBuf[off++] = 0x01;
    7024 
    7025         /* int3 */
    7026         pbCodeBuf[off++] = 0xcc;
    7027         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7028     }
    7029     else
    7030     {
    7031         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7032         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
    7033             iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
    7034     }
    7035 
    7036 # elif defined(RT_ARCH_ARM64)
    7037     /* mov TMP0, [gstreg] */
    7038     off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
    7039 
    7040     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    7041     /* sub tmp0, tmp0, idxReg */
    7042     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
    7043     /* cbz tmp0, +1 */
    7044     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    7045     /* brk #0x1000+enmGstReg */
    7046     pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
    7047     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7048 
    7049 # else
    7050 #  error "Port me!"
    7051 # endif
    7052     return off;
    7053 }
    7054 
    7055 
    7056 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    7057 #  ifdef RT_ARCH_AMD64
    7058 /**
    7059  * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
    7060  */
    7061 DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
    7062 {
    7063     /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
    7064     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7065     if (idxSimdReg >= 8)
    7066         pbCodeBuf[off++] = X86_OP_REX_R;
    7067     pbCodeBuf[off++] = 0x0f;
    7068     pbCodeBuf[off++] = 0x38;
    7069     pbCodeBuf[off++] = 0x29;
    7070     off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
    7071 
    7072     /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
    7073     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7074     pbCodeBuf[off++] =   X86_OP_REX_W
    7075                        | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
    7076                        | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
    7077     pbCodeBuf[off++] = 0x0f;
    7078     pbCodeBuf[off++] = 0x3a;
    7079     pbCodeBuf[off++] = 0x16;
    7080     pbCodeBuf[off++] = 0xeb;
    7081     pbCodeBuf[off++] = 0x00;
    7082 
    7083     /* cmp tmp0, 0xffffffffffffffff. */
    7084     pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
    7085     pbCodeBuf[off++] = 0x83;
    7086     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
    7087     pbCodeBuf[off++] = 0xff;
    7088 
    7089     /* je/jz +1 */
    7090     pbCodeBuf[off++] = 0x74;
    7091     pbCodeBuf[off++] = 0x01;
    7092 
    7093     /* int3 */
    7094     pbCodeBuf[off++] = 0xcc;
    7095 
    7096     /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
    7097     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7098     pbCodeBuf[off++] =   X86_OP_REX_W
    7099                        | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
    7100                        | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
    7101     pbCodeBuf[off++] = 0x0f;
    7102     pbCodeBuf[off++] = 0x3a;
    7103     pbCodeBuf[off++] = 0x16;
    7104     pbCodeBuf[off++] = 0xeb;
    7105     pbCodeBuf[off++] = 0x01;
    7106 
    7107     /* cmp tmp0, 0xffffffffffffffff. */
    7108     pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
    7109     pbCodeBuf[off++] = 0x83;
    7110     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
    7111     pbCodeBuf[off++] = 0xff;
    7112 
    7113     /* je/jz +1 */
    7114     pbCodeBuf[off++] = 0x74;
    7115     pbCodeBuf[off++] = 0x01;
    7116 
    7117     /* int3 */
    7118     pbCodeBuf[off++] = 0xcc;
    7119 
    7120     return off;
    7121 }
    7122 #  endif
    7123 
    7124 
    7125 /**
    7126  * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
    7127  * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
    7128  * instruction if that's not the case.
    7129  *
    7130  * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
    7131  *       Trashes EFLAGS on AMD64.
    7132  */
    7133 DECL_HIDDEN_THROW(uint32_t)
    7134 iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
    7135                                     IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    7136 {
    7137     /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
    7138     if (   (   enmLoadSz == kIemNativeGstSimdRegLdStSz_256
    7139             && (   IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
    7140                 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
    7141         || (   enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
    7142             && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
    7143         || (   enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
    7144             && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
    7145         return off;
    7146 
    7147 #  ifdef RT_ARCH_AMD64
    7148     if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
    7149     {
    7150         /* movdqa vectmp0, idxSimdReg */
    7151         off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
    7152 
    7153         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
    7154 
    7155         off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
    7156                                                           g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    7157     }
    7158 
    7159     if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
    7160     {
    7161         /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
    7162         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
    7163 
    7164         /* vextracti128 vectmp0, idxSimdReg, 1 */
    7165         pbCodeBuf[off++] = X86_OP_VEX3;
    7166         pbCodeBuf[off++] =   (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
    7167                            | X86_OP_VEX3_BYTE1_X
    7168                            | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
    7169                            | 0x03; /* Opcode map */
    7170         pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
    7171         pbCodeBuf[off++] = 0x39;
    7172         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
    7173         pbCodeBuf[off++] = 0x01;
    7174 
    7175         off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
    7176                                                           g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    7177     }
    7178 #  elif defined(RT_ARCH_ARM64)
    7179     /* mov vectmp0, [gstreg] */
    7180     off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
    7181 
    7182     if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
    7183     {
    7184         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    7185         /* eor vectmp0, vectmp0, idxSimdReg */
    7186         pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
    7187         /* uaddlv vectmp0, vectmp0.16B */
    7188         pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
    7189         /* umov tmp0, vectmp0.H[0] */
    7190         pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
    7191                                                     0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
    7192         /* cbz tmp0, +1 */
    7193         pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    7194         /* brk #0x1000+enmGstReg */
    7195         pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
    7196     }
    7197 
    7198     if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
    7199     {
    7200         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    7201         /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
    7202         pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
    7203         /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
    7204         pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
    7205         /* umov tmp0, (vectmp0 + 1).H[0] */
    7206         pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
    7207                                                     0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
    7208         /* cbz tmp0, +1 */
    7209         pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    7210         /* brk #0x1000+enmGstReg */
    7211         pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
    7212     }
    7213 
    7214 #  else
    7215 #   error "Port me!"
    7216 #  endif
    7217 
    7218     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7219     return off;
    7220 }
    7221 # endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    7222 
    7223 
    7224 /**
    7225  * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
    7226  * important bits.
    7227  *
    7228  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    7229  *       Trashes EFLAGS on AMD64.
    7230  */
    7231 DECL_HIDDEN_THROW(uint32_t)
    7232 iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
    7233 {
    7234     uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
    7235     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
    7236     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
    7237     off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
    7238 
    7239 #ifdef RT_ARCH_AMD64
    7240     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    7241 
    7242     /* je/jz +1 */
    7243     pbCodeBuf[off++] = 0x74;
    7244     pbCodeBuf[off++] = 0x01;
    7245 
    7246     /* int3 */
    7247     pbCodeBuf[off++] = 0xcc;
    7248 
    7249 # elif defined(RT_ARCH_ARM64)
    7250     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    7251 
    7252     /* b.eq +1 */
    7253     pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
    7254     /* brk #0x2000 */
    7255     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
    7256 
    7257 # else
    7258 #  error "Port me!"
    7259 # endif
    7260     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7261 
    7262     iemNativeRegFreeTmp(pReNative, idxRegTmp);
    7263     return off;
    7264 }
    7265 
    7266 #endif /* VBOX_STRICT */
    7267 
    7268 
    7269 #ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
    7270 /**
    7271  * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
    7272  */
    7273 DECL_HIDDEN_THROW(uint32_t)
    7274 iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
    7275 {
    7276     uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
    7277 
    7278     fEflNeeded &= X86_EFL_STATUS_BITS;
    7279     if (fEflNeeded)
    7280     {
    7281 # ifdef RT_ARCH_AMD64
    7282         /* test dword [pVCpu + offVCpu], imm32 */
    7283         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    7284         if (fEflNeeded <= 0xff)
    7285         {
    7286             pCodeBuf[off++] = 0xf6;
    7287             off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
    7288             pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
    7289         }
    7290         else
    7291         {
    7292             pCodeBuf[off++] = 0xf7;
    7293             off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
    7294             pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
    7295             pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
    7296             pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
    7297             pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
    7298         }
    7299         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7300 
    7301 # else
    7302         uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
    7303         off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
    7304         off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
    7305 #  ifdef RT_ARCH_ARM64
    7306         off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
    7307         off = iemNativeEmitBrk(pReNative, off, 0x7777);
    7308 #  else
    7309 #   error "Port me!"
    7310 #  endif
    7311         iemNativeRegFreeTmp(pReNative, idxRegTmp);
    7312 # endif
    7313     }
    7314     return off;
    7315 }
    7316 #endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
    7317 
    7318 
    7319 /**
    7320  * Emits a code for checking the return code of a call and rcPassUp, returning
    7321  * from the code if either are non-zero.
    7322  */
    7323 DECL_HIDDEN_THROW(uint32_t)
    7324 iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    7325 {
    7326 #ifdef RT_ARCH_AMD64
    7327     /*
    7328      * AMD64: eax = call status code.
    7329      */
    7330 
    7331     /* edx = rcPassUp */
    7332     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    7333 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7334     off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
    7335 # endif
    7336 
    7337     /* edx = eax | rcPassUp */
    7338     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    7339     pbCodeBuf[off++] = 0x0b;                    /* or edx, eax */
    7340     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
    7341     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7342 
    7343     /* Jump to non-zero status return path. */
    7344     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
    7345 
    7346     /* done. */
    7347 
    7348 #elif RT_ARCH_ARM64
    7349     /*
    7350      * ARM64: w0 = call status code.
    7351      */
    7352 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7353     off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
    7354 # endif
    7355     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    7356 
    7357     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    7358 
    7359     pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
    7360 
    7361     uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    7362     iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
    7363     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
    7364 
    7365 #else
    7366 # error "port me"
    7367 #endif
    7368     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7369     RT_NOREF_PV(idxInstr);
    7370     return off;
    7371 }
    7372 
    7373 
    7374 /**
    7375  * Emits code to check if the content of @a idxAddrReg is a canonical address,
    7376  * raising a \#GP(0) if it isn't.
    7377  *
    7378  * @returns New code buffer offset, UINT32_MAX on failure.
    7379  * @param   pReNative       The native recompile state.
    7380  * @param   off             The code buffer offset.
    7381  * @param   idxAddrReg      The host register with the address to check.
    7382  * @param   idxInstr        The current instruction.
    7383  */
    7384 DECL_HIDDEN_THROW(uint32_t)
    7385 iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
    7386 {
    7387     /*
    7388      * Make sure we don't have any outstanding guest register writes as we may
    7389      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    7390      */
    7391     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7392 
    7393 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7394     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    7395 #else
    7396     RT_NOREF(idxInstr);
    7397 #endif
    7398 
    7399 #ifdef RT_ARCH_AMD64
    7400     /*
    7401      * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
    7402      *     return raisexcpt();
    7403      * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
    7404      */
    7405     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    7406 
    7407     off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
    7408     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
    7409     off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
    7410     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
    7411     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    7412 
    7413     iemNativeRegFreeTmp(pReNative, iTmpReg);
    7414 
    7415 #elif defined(RT_ARCH_ARM64)
    7416     /*
    7417      * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
    7418      *     return raisexcpt();
    7419      * ----
    7420      *     mov     x1, 0x800000000000
    7421      *     add     x1, x0, x1
    7422      *     cmp     xzr, x1, lsr 48
    7423      *     b.ne    .Lraisexcpt
    7424      */
    7425     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    7426 
    7427     off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
    7428     off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
    7429     off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
    7430     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    7431 
    7432     iemNativeRegFreeTmp(pReNative, iTmpReg);
    7433 
    7434 #else
    7435 # error "Port me"
    7436 #endif
    7437     return off;
    7438 }
    7439 
    7440 
    7441 /**
    7442  * Emits code to check if that the content of @a idxAddrReg is within the limit
    7443  * of CS, raising a \#GP(0) if it isn't.
    7444  *
    7445  * @returns New code buffer offset; throws VBox status code on error.
    7446  * @param   pReNative       The native recompile state.
    7447  * @param   off             The code buffer offset.
    7448  * @param   idxAddrReg      The host register (32-bit) with the address to
    7449  *                          check.
    7450  * @param   idxInstr        The current instruction.
    7451  */
    7452 DECL_HIDDEN_THROW(uint32_t)
    7453 iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    7454                                                       uint8_t idxAddrReg, uint8_t idxInstr)
    7455 {
    7456     /*
    7457      * Make sure we don't have any outstanding guest register writes as we may
    7458      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    7459      */
    7460     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7461 
    7462 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7463     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    7464 #else
    7465     RT_NOREF(idxInstr);
    7466 #endif
    7467 
    7468     uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
    7469                                                                 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
    7470                                                                 kIemNativeGstRegUse_ReadOnly);
    7471 
    7472     off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
    7473     off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    7474 
    7475     iemNativeRegFreeTmp(pReNative, idxRegCsLim);
    7476     return off;
    7477 }
    7478 
    7479 
    7480 /**
    7481  * Emits a call to a CImpl function or something similar.
    7482  */
    7483 DECL_HIDDEN_THROW(uint32_t)
    7484 iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
    7485                        uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
    7486 {
    7487     /* Writeback everything. */
    7488     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7489 
    7490     /*
    7491      * Flush stuff. PC and EFlags are implictly flushed, the latter because we
    7492      * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
    7493      */
    7494     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
    7495                                                              fGstShwFlush
    7496                                                              | RT_BIT_64(kIemNativeGstReg_Pc)
    7497                                                              | RT_BIT_64(kIemNativeGstReg_EFlags));
    7498     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    7499 
    7500     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    7501 
    7502     /*
    7503      * Load the parameters.
    7504      */
    7505 #if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
    7506     /* Special code the hidden VBOXSTRICTRC pointer. */
    7507     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7508     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    7509     if (cAddParams > 0)
    7510         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
    7511     if (cAddParams > 1)
    7512         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
    7513     if (cAddParams > 2)
    7514         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
    7515     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    7516 
    7517 #else
    7518     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    7519     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7520     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    7521     if (cAddParams > 0)
    7522         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
    7523     if (cAddParams > 1)
    7524         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
    7525     if (cAddParams > 2)
    7526 # if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
    7527         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
    7528 # else
    7529         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
    7530 # endif
    7531 #endif
    7532 
    7533     /*
    7534      * Make the call.
    7535      */
    7536     off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
    7537 
    7538 #if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    7539     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    7540 #endif
    7541 
    7542     /*
    7543      * Check the status code.
    7544      */
    7545     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    7546 }
    7547 
    7548 
    7549 /**
    7550  * Emits a call to a threaded worker function.
    7551  */
    7552 DECL_HIDDEN_THROW(uint32_t)
    7553 iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
    7554 {
    7555     IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
    7556 
    7557     /* We don't know what the threaded function is doing so we must flush all pending writes. */
    7558     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7559 
    7560     iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
    7561     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    7562 
    7563 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7564     /* The threaded function may throw / long jmp, so set current instruction
    7565        number if we're counting. */
    7566     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    7567 #endif
    7568 
    7569     uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
    7570 
    7571 #ifdef RT_ARCH_AMD64
    7572     /* Load the parameters and emit the call. */
    7573 # ifdef RT_OS_WINDOWS
    7574 #  ifndef VBOXSTRICTRC_STRICT_ENABLED
    7575     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    7576     if (cParams > 0)
    7577         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
    7578     if (cParams > 1)
    7579         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
    7580     if (cParams > 2)
    7581         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
    7582 #  else  /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
    7583     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
    7584     if (cParams > 0)
    7585         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
    7586     if (cParams > 1)
    7587         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
    7588     if (cParams > 2)
    7589     {
    7590         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
    7591         off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
    7592     }
    7593     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    7594 #  endif /* VBOXSTRICTRC_STRICT_ENABLED */
    7595 # else
    7596     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    7597     if (cParams > 0)
    7598         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
    7599     if (cParams > 1)
    7600         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
    7601     if (cParams > 2)
    7602         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
    7603 # endif
    7604 
    7605     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    7606 
    7607 # if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    7608     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    7609 # endif
    7610 
    7611 #elif RT_ARCH_ARM64
    7612     /*
    7613      * ARM64:
    7614      */
    7615     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7616     if (cParams > 0)
    7617         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
    7618     if (cParams > 1)
    7619         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
    7620     if (cParams > 2)
    7621         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
    7622 
    7623     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    7624 
    7625 #else
    7626 # error "port me"
    7627 #endif
    7628 
    7629     /*
    7630      * Check the status code.
    7631      */
    7632     off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
    7633 
    7634     return off;
    7635 }
    7636 
    7637 #ifdef VBOX_WITH_STATISTICS
    7638 /**
    7639  * Emits code to update the thread call statistics.
    7640  */
    7641 DECL_INLINE_THROW(uint32_t)
    7642 iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
    7643 {
    7644     /*
    7645      * Update threaded function stats.
    7646      */
    7647     uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
    7648     AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
    7649 # if defined(RT_ARCH_ARM64)
    7650     uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
    7651     uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
    7652     off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
    7653     iemNativeRegFreeTmp(pReNative, idxTmp1);
    7654     iemNativeRegFreeTmp(pReNative, idxTmp2);
    7655 # else
    7656     off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
    7657 # endif
    7658     return off;
    7659 }
    7660 #endif /* VBOX_WITH_STATISTICS */
    7661 
    7662 
    7663 /**
    7664  * Emits the code at the ReturnWithFlags label (returns
    7665  * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
    7666  */
    7667 static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7668 {
    7669     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
    7670     if (idxLabel != UINT32_MAX)
    7671     {
    7672         iemNativeLabelDefine(pReNative, idxLabel, off);
    7673 
    7674         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
    7675 
    7676         /* jump back to the return sequence. */
    7677         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7678     }
    7679     return off;
    7680 }
    7681 
    7682 
    7683 /**
    7684  * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
    7685  */
    7686 static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7687 {
    7688     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
    7689     if (idxLabel != UINT32_MAX)
    7690     {
    7691         iemNativeLabelDefine(pReNative, idxLabel, off);
    7692 
    7693         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
    7694 
    7695         /* jump back to the return sequence. */
    7696         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7697     }
    7698     return off;
    7699 }
    7700 
    7701 
    7702 /**
    7703  * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
    7704  */
    7705 static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    7706 {
    7707     /*
    7708      * Generate the rc + rcPassUp fiddling code if needed.
    7709      */
    7710     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    7711     if (idxLabel != UINT32_MAX)
    7712     {
    7713         iemNativeLabelDefine(pReNative, idxLabel, off);
    7714 
    7715         /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
    7716 #ifdef RT_ARCH_AMD64
    7717 # ifdef RT_OS_WINDOWS
    7718 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7719         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8,  X86_GREG_xCX); /* cl = instruction number */
    7720 #  endif
    7721         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    7722         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
    7723 # else
    7724         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    7725         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
    7726 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7727         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
    7728 #  endif
    7729 # endif
    7730 # ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    7731         off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
    7732 # endif
    7733 
    7734 #else
    7735         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
    7736         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    7737         /* IEMNATIVE_CALL_ARG2_GREG is already set. */
    7738 #endif
    7739 
    7740         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
    7741         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    7742     }
    7743     return off;
    7744 }
    7745 
    7746 
    7747 /**
    7748  * Emits a standard epilog.
    7749  */
    7750 static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
    7751 {
    7752     *pidxReturnLabel = UINT32_MAX;
    7753 
    7754     /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
    7755     off = iemNativeRegFlushPendingWrites(pReNative, off);
    7756 
    7757     /*
    7758      * Successful return, so clear the return register (eax, w0).
    7759      */
    7760     off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
    7761 
    7762     /*
    7763      * Define label for common return point.
    7764      */
    7765     uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
    7766     *pidxReturnLabel = idxReturn;
    7767 
    7768     IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
    7769 
    7770     /*
    7771      * Restore registers and return.
    7772      */
    7773 #ifdef RT_ARCH_AMD64
    7774     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    7775 
    7776     /* Reposition esp at the r15 restore point. */
    7777     pbCodeBuf[off++] = X86_OP_REX_W;
    7778     pbCodeBuf[off++] = 0x8d;                    /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
    7779     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
    7780     pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
    7781 
    7782     /* Pop non-volatile registers and return */
    7783     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r15 */
    7784     pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
    7785     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r14 */
    7786     pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
    7787     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r13 */
    7788     pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
    7789     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r12 */
    7790     pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
    7791 # ifdef RT_OS_WINDOWS
    7792     pbCodeBuf[off++] = 0x58 + X86_GREG_xDI;     /* pop rdi */
    7793     pbCodeBuf[off++] = 0x58 + X86_GREG_xSI;     /* pop rsi */
    7794 # endif
    7795     pbCodeBuf[off++] = 0x58 + X86_GREG_xBX;     /* pop rbx */
    7796     pbCodeBuf[off++] = 0xc9;                    /* leave */
    7797     pbCodeBuf[off++] = 0xc3;                    /* ret */
    7798     pbCodeBuf[off++] = 0xcc;                    /* int3 poison */
    7799 
    7800 #elif RT_ARCH_ARM64
    7801     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    7802 
    7803     /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
    7804     AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
    7805     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    7806                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    7807                                                  IEMNATIVE_FRAME_VAR_SIZE / 8);
    7808     /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
    7809     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7810                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    7811     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7812                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    7813     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7814                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    7815     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7816                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    7817     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7818                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    7819     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    7820 
    7821     /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ;  */
    7822     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
    7823     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
    7824                                                      IEMNATIVE_FRAME_SAVE_REG_SIZE);
    7825 
    7826     /* retab / ret */
    7827 # ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
    7828     if (1)
    7829         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
    7830     else
    7831 # endif
    7832         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
    7833 
    7834 #else
    7835 # error "port me"
    7836 #endif
    7837     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7838 
    7839     return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
    7840 }
    7841 
    7842 
    7843 /**
    7844  * Emits a standard prolog.
    7845  */
    7846 static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    7847 {
    7848 #ifdef RT_ARCH_AMD64
    7849     /*
    7850      * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
    7851      * reserving 64 bytes for stack variables plus 4 non-register argument
    7852      * slots.  Fixed register assignment: xBX = pReNative;
    7853      *
    7854      * Since we always do the same register spilling, we can use the same
    7855      * unwind description for all the code.
    7856      */
    7857     uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    7858     pbCodeBuf[off++] = 0x50 + X86_GREG_xBP;     /* push rbp */
    7859     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbp, rsp */
    7860     pbCodeBuf[off++] = 0x8b;
    7861     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
    7862     pbCodeBuf[off++] = 0x50 + X86_GREG_xBX;     /* push rbx */
    7863     AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
    7864 # ifdef RT_OS_WINDOWS
    7865     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rcx ; RBX = pVCpu */
    7866     pbCodeBuf[off++] = 0x8b;
    7867     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
    7868     pbCodeBuf[off++] = 0x50 + X86_GREG_xSI;     /* push rsi */
    7869     pbCodeBuf[off++] = 0x50 + X86_GREG_xDI;     /* push rdi */
    7870 # else
    7871     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rdi ; RBX = pVCpu */
    7872     pbCodeBuf[off++] = 0x8b;
    7873     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
    7874 # endif
    7875     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r12 */
    7876     pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
    7877     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r13 */
    7878     pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
    7879     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r14 */
    7880     pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
    7881     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r15 */
    7882     pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
    7883 
    7884 # ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
    7885     /* Save the frame pointer. */
    7886     off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
    7887 # endif
    7888 
    7889     off = iemNativeEmitSubGprImm(pReNative, off,    /* sub rsp, byte 28h */
    7890                                  X86_GREG_xSP,
    7891                                    IEMNATIVE_FRAME_ALIGN_SIZE
    7892                                  + IEMNATIVE_FRAME_VAR_SIZE
    7893                                  + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
    7894                                  + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
    7895     AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
    7896     AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
    7897     AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
    7898 
    7899 #elif RT_ARCH_ARM64
    7900     /*
    7901      * We set up a stack frame exactly like on x86, only we have to push the
    7902      * return address our selves here.  We save all non-volatile registers.
    7903      */
    7904     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
    7905 
    7906 # ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
    7907                       * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind.  It's
    7908                       * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
    7909                       * in any way conditional, so just emitting this instructions now and hoping for the best... */
    7910     /* pacibsp */
    7911     pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
    7912 # endif
    7913 
    7914     /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
    7915     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
    7916     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    7917                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    7918                                                  -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
    7919     /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
    7920     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7921                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    7922     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7923                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    7924     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7925                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    7926     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7927                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    7928     /* Save the BP and LR (ret address) registers at the top of the frame. */
    7929     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    7930                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    7931     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    7932     /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
    7933     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
    7934                                                      ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
    7935 
    7936     /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ;  Allocate the variable area from SP. */
    7937     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
    7938 
    7939     /* mov r28, r0  */
    7940     off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
    7941     /* mov r27, r1  */
    7942     off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
    7943 
    7944 # ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
    7945     /* Save the frame pointer. */
    7946     off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
    7947                                            ARMV8_A64_REG_X2);
    7948 # endif
    7949 
    7950 #else
    7951 # error "port me"
    7952 #endif
    7953     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    7954     return off;
    7955 }
    7956 
    7957 
    7958 /*********************************************************************************************************************************
    7959 *   Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++                                                            *
    7960 *********************************************************************************************************************************/
    7961 
    7962 /**
    7963  * Internal work that allocates a variable with kind set to
    7964  * kIemNativeVarKind_Invalid and no current stack allocation.
    7965  *
    7966  * The kind will either be set by the caller or later when the variable is first
    7967  * assigned a value.
    7968  *
    7969  * @returns Unpacked index.
    7970  * @internal
    7971  */
    7972 static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    7973 {
    7974     Assert(cbType > 0 && cbType <= 64);
    7975     unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
    7976     AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
    7977     pReNative->Core.bmVars |= RT_BIT_32(idxVar);
    7978     pReNative->Core.aVars[idxVar].enmKind        = kIemNativeVarKind_Invalid;
    7979     pReNative->Core.aVars[idxVar].cbVar          = cbType;
    7980     pReNative->Core.aVars[idxVar].idxStackSlot   = UINT8_MAX;
    7981     pReNative->Core.aVars[idxVar].idxReg         = UINT8_MAX;
    7982     pReNative->Core.aVars[idxVar].uArgNo         = UINT8_MAX;
    7983     pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
    7984     pReNative->Core.aVars[idxVar].enmGstReg      = kIemNativeGstReg_End;
    7985     pReNative->Core.aVars[idxVar].fRegAcquired   = false;
    7986     pReNative->Core.aVars[idxVar].u.uValue       = 0;
    7987 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    7988     pReNative->Core.aVars[idxVar].fSimdReg       = false;
    7989 #endif
    7990     return idxVar;
    7991 }
    7992 
    7993 
    7994 /**
    7995  * Internal work that allocates an argument variable w/o setting enmKind.
    7996  *
    7997  * @returns Unpacked index.
    7998  * @internal
    7999  */
    8000 static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    8001 {
    8002     iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
    8003     AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    8004     AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
    8005 
    8006     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    8007     pReNative->Core.aidxArgVars[iArgNo]  = idxVar; /* (unpacked) */
    8008     pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
    8009     return idxVar;
    8010 }
    8011 
    8012 
    8013 /**
    8014  * Gets the stack slot for a stack variable, allocating one if necessary.
    8015  *
    8016  * Calling this function implies that the stack slot will contain a valid
    8017  * variable value.  The caller deals with any register currently assigned to the
    8018  * variable, typically by spilling it into the stack slot.
    8019  *
    8020  * @returns The stack slot number.
    8021  * @param   pReNative   The recompiler state.
    8022  * @param   idxVar      The variable.
    8023  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS
    8024  */
    8025 DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    8026 {
    8027     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8028     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8029     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    8030 
    8031     /* Already got a slot? */
    8032     uint8_t const idxStackSlot = pVar->idxStackSlot;
    8033     if (idxStackSlot != UINT8_MAX)
    8034     {
    8035         Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
    8036         return idxStackSlot;
    8037     }
    8038 
    8039     /*
    8040      * A single slot is easy to allocate.
    8041      * Allocate them from the top end, closest to BP, to reduce the displacement.
    8042      */
    8043     if (pVar->cbVar <= sizeof(uint64_t))
    8044     {
    8045         unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    8046         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    8047         pReNative->Core.bmStack |= RT_BIT_32(iSlot);
    8048         pVar->idxStackSlot       = (uint8_t)iSlot;
    8049         Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
    8050         return (uint8_t)iSlot;
    8051     }
    8052 
    8053     /*
    8054      * We need more than one stack slot.
    8055      *
    8056      * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
    8057      */
    8058     AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
    8059     Assert(pVar->cbVar <= 64);
    8060     uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
    8061     uint32_t       fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
    8062     uint32_t       bmStack       = pReNative->Core.bmStack;
    8063     while (bmStack != UINT32_MAX)
    8064     {
    8065         unsigned iSlot = ASMBitLastSetU32(~bmStack);
    8066         AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    8067         iSlot = (iSlot - 1) & ~fBitAlignMask;
    8068         if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
    8069         {
    8070             pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
    8071             pVar->idxStackSlot       = (uint8_t)iSlot;
    8072             Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
    8073                    idxVar, iSlot, fBitAllocMask, pVar->cbVar));
    8074             return (uint8_t)iSlot;
    8075         }
    8076 
    8077         bmStack |= (fBitAllocMask << iSlot);
    8078     }
    8079     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    8080 }
    8081 
    8082 
    8083 /**
    8084  * Changes the variable to a stack variable.
    8085  *
    8086  * Currently this is s only possible to do the first time the variable is used,
    8087  * switching later is can be implemented but not done.
    8088  *
    8089  * @param   pReNative   The recompiler state.
    8090  * @param   idxVar      The variable.
    8091  * @throws  VERR_IEM_VAR_IPE_2
    8092  */
    8093 DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    8094 {
    8095     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8096     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8097     if (pVar->enmKind != kIemNativeVarKind_Stack)
    8098     {
    8099         /* We could in theory transition from immediate to stack as well, but it
    8100            would involve the caller doing work storing the value on the stack. So,
    8101            till that's required we only allow transition from invalid. */
    8102         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    8103         AssertStmt(pVar->idxReg  == UINT8_MAX,                 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    8104         pVar->enmKind = kIemNativeVarKind_Stack;
    8105 
    8106         /* Note! We don't allocate a stack slot here, that's only done when a
    8107                  slot is actually needed to hold a variable value. */
    8108     }
    8109 }
    8110 
    8111 
    8112 /**
    8113  * Sets it to a variable with a constant value.
    8114  *
    8115  * This does not require stack storage as we know the value and can always
    8116  * reload it, unless of course it's referenced.
    8117  *
    8118  * @param   pReNative   The recompiler state.
    8119  * @param   idxVar      The variable.
    8120  * @param   uValue      The immediate value.
    8121  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    8122  */
    8123 DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
    8124 {
    8125     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8126     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8127     if (pVar->enmKind != kIemNativeVarKind_Immediate)
    8128     {
    8129         /* Only simple transitions for now. */
    8130         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    8131         pVar->enmKind = kIemNativeVarKind_Immediate;
    8132     }
    8133     AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    8134 
    8135     pVar->u.uValue = uValue;
    8136     AssertMsg(   pVar->cbVar >= sizeof(uint64_t)
    8137               || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
    8138               ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
    8139 }
    8140 
    8141 
    8142 /**
    8143  * Sets the variable to a reference (pointer) to @a idxOtherVar.
    8144  *
    8145  * This does not require stack storage as we know the value and can always
    8146  * reload it.  Loading is postponed till needed.
    8147  *
    8148  * @param   pReNative   The recompiler state.
    8149  * @param   idxVar      The variable. Unpacked.
    8150  * @param   idxOtherVar The variable to take the (stack) address of. Unpacked.
    8151  *
    8152  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    8153  * @internal
    8154  */
    8155 static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
    8156 {
    8157     Assert(idxVar      < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
    8158     Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
    8159 
    8160     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
    8161     {
    8162         /* Only simple transitions for now. */
    8163         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    8164                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    8165         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
    8166     }
    8167     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    8168 
    8169     pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
    8170 
    8171     /* Update the other variable, ensure it's a stack variable. */
    8172     /** @todo handle variables with const values... that'll go boom now. */
    8173     pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
    8174     iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    8175 }
    8176 
    8177 
    8178 /**
    8179  * Sets the variable to a reference (pointer) to a guest register reference.
    8180  *
    8181  * This does not require stack storage as we know the value and can always
    8182  * reload it.  Loading is postponed till needed.
    8183  *
    8184  * @param   pReNative       The recompiler state.
    8185  * @param   idxVar          The variable.
    8186  * @param   enmRegClass     The class guest registers to reference.
    8187  * @param   idxReg          The register within @a enmRegClass to reference.
    8188  *
    8189  * @throws  VERR_IEM_VAR_IPE_2
    8190  */
    8191 DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    8192                                                        IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
    8193 {
    8194     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8195     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8196 
    8197     if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
    8198     {
    8199         /* Only simple transitions for now. */
    8200         AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    8201         pVar->enmKind = kIemNativeVarKind_GstRegRef;
    8202     }
    8203     AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    8204 
    8205     pVar->u.GstRegRef.enmClass = enmRegClass;
    8206     pVar->u.GstRegRef.idx      = idxReg;
    8207 }
    8208 
    8209 
    8210 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    8211 {
    8212     return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
    8213 }
    8214 
    8215 
    8216 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
    8217 {
    8218     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
    8219 
    8220     /* Since we're using a generic uint64_t value type, we must truncate it if
    8221        the variable is smaller otherwise we may end up with too large value when
    8222        scaling up a imm8 w/ sign-extension.
    8223 
    8224        This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
    8225        in the bios, bx=1) when running on arm, because clang expect 16-bit
    8226        register parameters to have bits 16 and up set to zero.  Instead of
    8227        setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
    8228        CF value in the result.  */
    8229     switch (cbType)
    8230     {
    8231         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    8232         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    8233         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    8234     }
    8235     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    8236     return idxVar;
    8237 }
    8238 
    8239 
    8240 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
    8241 {
    8242     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
    8243     idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
    8244     AssertStmt(   idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
    8245                && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
    8246                && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
    8247                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    8248 
    8249     uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
    8250     iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
    8251     return idxArgVar;
    8252 }
    8253 
    8254 
    8255 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    8256 {
    8257     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
    8258     /* Don't set to stack now, leave that to the first use as for instance
    8259        IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
    8260     return idxVar;
    8261 }
    8262 
    8263 
    8264 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
    8265 {
    8266     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
    8267 
    8268     /* Since we're using a generic uint64_t value type, we must truncate it if
    8269        the variable is smaller otherwise we may end up with too large value when
    8270        scaling up a imm8 w/ sign-extension. */
    8271     switch (cbType)
    8272     {
    8273         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    8274         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    8275         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    8276     }
    8277     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    8278     return idxVar;
    8279 }
    8280 
    8281 
    8282 DECL_HIDDEN_THROW(uint8_t)  iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
    8283 {
    8284     uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
    8285     iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    8286 
    8287     uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
    8288     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
    8289 
    8290     *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
    8291 
    8292     /* Truncate the value to this variables size. */
    8293     switch (cbType)
    8294     {
    8295         case sizeof(uint8_t):   *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
    8296         case sizeof(uint16_t):  *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
    8297         case sizeof(uint32_t):  *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
    8298     }
    8299 
    8300     iemNativeVarRegisterRelease(pReNative, idxVarOther);
    8301     iemNativeVarRegisterRelease(pReNative, idxVar);
    8302     return idxVar;
    8303 }
    8304 
    8305 
    8306 /**
    8307  * Makes sure variable @a idxVar has a register assigned to it and that it stays
    8308  * fixed till we call iemNativeVarRegisterRelease.
    8309  *
    8310  * @returns The host register number.
    8311  * @param   pReNative   The recompiler state.
    8312  * @param   idxVar      The variable.
    8313  * @param   poff        Pointer to the instruction buffer offset.
    8314  *                      In case a register needs to be freed up or the value
    8315  *                      loaded off the stack.
    8316  * @param  fInitialized Set if the variable must already have been initialized.
    8317  *                      Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
    8318  *                      the case.
    8319  * @param  idxRegPref   Preferred register number or UINT8_MAX.
    8320  */
    8321 DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
    8322                                                        bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
    8323 {
    8324     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8325     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8326     Assert(pVar->cbVar <= 8);
    8327     Assert(!pVar->fRegAcquired);
    8328 
    8329     uint8_t idxReg = pVar->idxReg;
    8330     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8331     {
    8332         Assert(   pVar->enmKind > kIemNativeVarKind_Invalid
    8333                && pVar->enmKind < kIemNativeVarKind_End);
    8334         pVar->fRegAcquired = true;
    8335         return idxReg;
    8336     }
    8337 
    8338     /*
    8339      * If the kind of variable has not yet been set, default to 'stack'.
    8340      */
    8341     Assert(   pVar->enmKind >= kIemNativeVarKind_Invalid
    8342            && pVar->enmKind < kIemNativeVarKind_End);
    8343     if (pVar->enmKind == kIemNativeVarKind_Invalid)
    8344         iemNativeVarSetKindToStack(pReNative, idxVar);
    8345 
    8346     /*
    8347      * We have to allocate a register for the variable, even if its a stack one
    8348      * as we don't know if there are modification being made to it before its
    8349      * finalized (todo: analyze and insert hints about that?).
    8350      *
    8351      * If we can, we try get the correct register for argument variables. This
    8352      * is assuming that most argument variables are fetched as close as possible
    8353      * to the actual call, so that there aren't any interfering hidden calls
    8354      * (memory accesses, etc) inbetween.
    8355      *
    8356      * If we cannot or it's a variable, we make sure no argument registers
    8357      * that will be used by this MC block will be allocated here, and we always
    8358      * prefer non-volatile registers to avoid needing to spill stuff for internal
    8359      * call.
    8360      */
    8361     /** @todo Detect too early argument value fetches and warn about hidden
    8362      * calls causing less optimal code to be generated in the python script. */
    8363 
    8364     uint8_t const uArgNo = pVar->uArgNo;
    8365     if (   uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
    8366         && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
    8367     {
    8368         idxReg = g_aidxIemNativeCallRegs[uArgNo];
    8369 
    8370 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    8371         /* Writeback any dirty shadow registers we are about to unshadow. */
    8372         *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
    8373 #endif
    8374 
    8375         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    8376         Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
    8377     }
    8378     else if (   idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
    8379              || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
    8380     {
    8381         /** @todo there must be a better way for this and boot cArgsX?   */
    8382         uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
    8383         uint32_t const fRegs        = ~pReNative->Core.bmHstRegs
    8384                                     & ~pReNative->Core.bmHstRegsWithGstShadow
    8385                                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    8386                                     & fNotArgsMask;
    8387         if (fRegs)
    8388         {
    8389             /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
    8390             idxReg = (uint8_t)ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    8391                                                ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    8392             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    8393             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    8394             Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    8395         }
    8396         else
    8397         {
    8398             idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
    8399                                                IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
    8400             AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
    8401             Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    8402         }
    8403     }
    8404     else
    8405     {
    8406         idxReg = idxRegPref;
    8407         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    8408         Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
    8409     }
    8410     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    8411     pVar->idxReg = idxReg;
    8412 
    8413 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    8414     pVar->fSimdReg = false;
    8415 #endif
    8416 
    8417     /*
    8418      * Load it off the stack if we've got a stack slot.
    8419      */
    8420     uint8_t const idxStackSlot = pVar->idxStackSlot;
    8421     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    8422     {
    8423         Assert(fInitialized);
    8424         int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
    8425         switch (pVar->cbVar)
    8426         {
    8427             case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
    8428             case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
    8429             case 3: AssertFailed(); RT_FALL_THRU();
    8430             case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
    8431             default: AssertFailed(); RT_FALL_THRU();
    8432             case 8: *poff = iemNativeEmitLoadGprByBp(   pReNative, *poff, idxReg, offDispBp); break;
    8433         }
    8434     }
    8435     else
    8436     {
    8437         Assert(idxStackSlot == UINT8_MAX);
    8438         AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8439     }
    8440     pVar->fRegAcquired = true;
    8441     return idxReg;
    8442 }
    8443 
    8444 
    8445 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    8446 /**
    8447  * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
    8448  * fixed till we call iemNativeVarRegisterRelease.
    8449  *
    8450  * @returns The host register number.
    8451  * @param   pReNative   The recompiler state.
    8452  * @param   idxVar      The variable.
    8453  * @param   poff        Pointer to the instruction buffer offset.
    8454  *                      In case a register needs to be freed up or the value
    8455  *                      loaded off the stack.
    8456  * @param  fInitialized Set if the variable must already have been initialized.
    8457  *                      Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
    8458  *                      the case.
    8459  * @param  idxRegPref   Preferred SIMD register number or UINT8_MAX.
    8460  */
    8461 DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
    8462                                                            bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
    8463 {
    8464     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8465     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8466     Assert(   pVar->cbVar == sizeof(RTUINT128U)
    8467            || pVar->cbVar == sizeof(RTUINT256U));
    8468     Assert(!pVar->fRegAcquired);
    8469 
    8470     uint8_t idxReg = pVar->idxReg;
    8471     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
    8472     {
    8473         Assert(   pVar->enmKind > kIemNativeVarKind_Invalid
    8474                && pVar->enmKind < kIemNativeVarKind_End);
    8475         pVar->fRegAcquired = true;
    8476         return idxReg;
    8477     }
    8478 
    8479     /*
    8480      * If the kind of variable has not yet been set, default to 'stack'.
    8481      */
    8482     Assert(   pVar->enmKind >= kIemNativeVarKind_Invalid
    8483            && pVar->enmKind < kIemNativeVarKind_End);
    8484     if (pVar->enmKind == kIemNativeVarKind_Invalid)
    8485         iemNativeVarSetKindToStack(pReNative, idxVar);
    8486 
    8487     /*
    8488      * We have to allocate a register for the variable, even if its a stack one
    8489      * as we don't know if there are modification being made to it before its
    8490      * finalized (todo: analyze and insert hints about that?).
    8491      *
    8492      * If we can, we try get the correct register for argument variables. This
    8493      * is assuming that most argument variables are fetched as close as possible
    8494      * to the actual call, so that there aren't any interfering hidden calls
    8495      * (memory accesses, etc) inbetween.
    8496      *
    8497      * If we cannot or it's a variable, we make sure no argument registers
    8498      * that will be used by this MC block will be allocated here, and we always
    8499      * prefer non-volatile registers to avoid needing to spill stuff for internal
    8500      * call.
    8501      */
    8502     /** @todo Detect too early argument value fetches and warn about hidden
    8503      * calls causing less optimal code to be generated in the python script. */
    8504 
    8505     uint8_t const uArgNo = pVar->uArgNo;
    8506     Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
    8507 
    8508     /* SIMD is bit simpler for now because there is no support for arguments. */
    8509     if (   idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
    8510         || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
    8511     {
    8512         uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
    8513         uint32_t const fRegs        = ~pReNative->Core.bmHstSimdRegs
    8514                                     & ~pReNative->Core.bmHstSimdRegsWithGstShadow
    8515                                     & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
    8516                                     & fNotArgsMask;
    8517         if (fRegs)
    8518         {
    8519             idxReg = (uint8_t)ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
    8520                                                ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
    8521             Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
    8522             Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
    8523             Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    8524         }
    8525         else
    8526         {
    8527             idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
    8528                                                    IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
    8529             AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
    8530             Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    8531         }
    8532     }
    8533     else
    8534     {
    8535         idxReg = idxRegPref;
    8536         AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    8537         Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
    8538     }
    8539     iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    8540 
    8541     pVar->fSimdReg = true;
    8542     pVar->idxReg = idxReg;
    8543 
    8544     /*
    8545      * Load it off the stack if we've got a stack slot.
    8546      */
    8547     uint8_t const idxStackSlot = pVar->idxStackSlot;
    8548     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    8549     {
    8550         Assert(fInitialized);
    8551         int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
    8552         switch (pVar->cbVar)
    8553         {
    8554             case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
    8555             default: AssertFailed(); RT_FALL_THRU();
    8556             case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
    8557         }
    8558     }
    8559     else
    8560     {
    8561         Assert(idxStackSlot == UINT8_MAX);
    8562         AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8563     }
    8564     pVar->fRegAcquired = true;
    8565     return idxReg;
    8566 }
    8567 #endif
    8568 
    8569 
    8570 /**
    8571  * The value of variable @a idxVar will be written in full to the @a enmGstReg
    8572  * guest register.
    8573  *
    8574  * This function makes sure there is a register for it and sets it to be the
    8575  * current shadow copy of @a enmGstReg.
    8576  *
    8577  * @returns The host register number.
    8578  * @param   pReNative   The recompiler state.
    8579  * @param   idxVar      The variable.
    8580  * @param   enmGstReg   The guest register this variable will be written to
    8581  *                      after this call.
    8582  * @param   poff        Pointer to the instruction buffer offset.
    8583  *                      In case a register needs to be freed up or if the
    8584  *                      variable content needs to be loaded off the stack.
    8585  *
    8586  * @note    We DO NOT expect @a idxVar to be an argument variable,
    8587  *          because we can only in the commit stage of an instruction when this
    8588  *          function is used.
    8589  */
    8590 DECL_HIDDEN_THROW(uint8_t)
    8591 iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
    8592 {
    8593     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8594     PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    8595     Assert(!pVar->fRegAcquired);
    8596     AssertMsgStmt(   pVar->cbVar <= 8
    8597                   && (   pVar->enmKind == kIemNativeVarKind_Immediate
    8598                       || pVar->enmKind == kIemNativeVarKind_Stack),
    8599                   ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
    8600                    pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
    8601                   IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    8602 
    8603     /*
    8604      * This shouldn't ever be used for arguments, unless it's in a weird else
    8605      * branch that doesn't do any calling and even then it's questionable.
    8606      *
    8607      * However, in case someone writes crazy wrong MC code and does register
    8608      * updates before making calls, just use the regular register allocator to
    8609      * ensure we get a register suitable for the intended argument number.
    8610      */
    8611     AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
    8612 
    8613     /*
    8614      * If there is already a register for the variable, we transfer/set the
    8615      * guest shadow copy assignment to it.
    8616      */
    8617     uint8_t idxReg = pVar->idxReg;
    8618     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8619     {
    8620 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    8621         if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
    8622         {
    8623 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    8624             iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
    8625             iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
    8626 # endif
    8627             pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
    8628         }
    8629 #endif
    8630 
    8631         if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    8632         {
    8633             uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
    8634             iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
    8635             Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
    8636                    g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
    8637         }
    8638         else
    8639         {
    8640             iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
    8641             Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
    8642                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    8643         }
    8644         /** @todo figure this one out. We need some way of making sure the register isn't
    8645          * modified after this point, just in case we start writing crappy MC code. */
    8646         pVar->enmGstReg    = enmGstReg;
    8647         pVar->fRegAcquired = true;
    8648         return idxReg;
    8649     }
    8650     Assert(pVar->uArgNo == UINT8_MAX);
    8651 
    8652     /*
    8653      * Because this is supposed to be the commit stage, we're just tag along with the
    8654      * temporary register allocator and upgrade it to a variable register.
    8655      */
    8656     idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
    8657     Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
    8658     Assert(pReNative->Core.aHstRegs[idxReg].idxVar  == UINT8_MAX);
    8659     pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
    8660     pReNative->Core.aHstRegs[idxReg].idxVar  = idxVar;
    8661     pVar->idxReg                             = idxReg;
    8662 
    8663     /*
    8664      * Now we need to load the register value.
    8665      */
    8666     if (pVar->enmKind == kIemNativeVarKind_Immediate)
    8667         *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
    8668     else
    8669     {
    8670         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    8671         int32_t const offDispBp    = iemNativeStackCalcBpDisp(idxStackSlot);
    8672         switch (pVar->cbVar)
    8673         {
    8674             case sizeof(uint64_t):
    8675                 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
    8676                 break;
    8677             case sizeof(uint32_t):
    8678                 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
    8679                 break;
    8680             case sizeof(uint16_t):
    8681                 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
    8682                 break;
    8683             case sizeof(uint8_t):
    8684                 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
    8685                 break;
    8686             default:
    8687                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    8688         }
    8689     }
    8690 
    8691     pVar->fRegAcquired = true;
    8692     return idxReg;
    8693 }
    8694 
    8695 
    8696 /**
    8697  * Emit code to save volatile registers prior to a call to a helper (TLB miss).
    8698  *
    8699  * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
    8700  * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
    8701  * requirement of flushing anything in volatile host registers when making a
    8702  * call.
    8703  *
    8704  * @returns New @a off value.
    8705  * @param   pReNative           The recompiler state.
    8706  * @param   off                 The code buffer position.
    8707  * @param   fHstRegsNotToSave   Set of registers not to save & restore.
    8708  */
    8709 DECL_HIDDEN_THROW(uint32_t)
    8710 iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
    8711 {
    8712     uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
    8713     if (fHstRegs)
    8714     {
    8715         do
    8716         {
    8717             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    8718             fHstRegs &= ~RT_BIT_32(idxHstReg);
    8719 
    8720             if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
    8721             {
    8722                 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    8723                 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8724                 AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
    8725                            && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
    8726                            && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
    8727                            IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    8728                 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
    8729                 {
    8730                     case kIemNativeVarKind_Stack:
    8731                     {
    8732                         /* Temporarily spill the variable register. */
    8733                         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    8734                         Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    8735                                idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8736                         off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    8737                         continue;
    8738                     }
    8739 
    8740                     case kIemNativeVarKind_Immediate:
    8741                     case kIemNativeVarKind_VarRef:
    8742                     case kIemNativeVarKind_GstRegRef:
    8743                         /* It is weird to have any of these loaded at this point. */
    8744                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    8745                         continue;
    8746 
    8747                     case kIemNativeVarKind_End:
    8748                     case kIemNativeVarKind_Invalid:
    8749                         break;
    8750                 }
    8751                 AssertFailed();
    8752             }
    8753             else
    8754             {
    8755                 /*
    8756                  * Allocate a temporary stack slot and spill the register to it.
    8757                  */
    8758                 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    8759                 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
    8760                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    8761                 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
    8762                 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
    8763                 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    8764                        idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8765                 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    8766             }
    8767         } while (fHstRegs);
    8768     }
    8769 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    8770 
    8771     /*
    8772      * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
    8773      * which would be more difficult due to spanning multiple stack slots and different sizes
    8774      * (besides we only have a limited amount of slots at the moment).
    8775      *
    8776      * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
    8777      * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
    8778      */
    8779     iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
    8780 
    8781     fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
    8782     if (fHstRegs)
    8783     {
    8784         do
    8785         {
    8786             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    8787             fHstRegs &= ~RT_BIT_32(idxHstReg);
    8788 
    8789             /* Fixed reserved and temporary registers don't need saving. */
    8790             if (   pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
    8791                 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
    8792                 continue;
    8793 
    8794             Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
    8795 
    8796             uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
    8797             IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8798             AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
    8799                        && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
    8800                        && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
    8801                        && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
    8802                        && (   pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
    8803                            || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
    8804                        IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    8805             switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
    8806             {
    8807                 case kIemNativeVarKind_Stack:
    8808                 {
    8809                     /* Temporarily spill the variable register. */
    8810                     uint8_t const cbVar        = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
    8811                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    8812                     Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    8813                            idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8814                     if (cbVar == sizeof(RTUINT128U))
    8815                         off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    8816                     else
    8817                         off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    8818                     continue;
    8819                 }
    8820 
    8821                 case kIemNativeVarKind_Immediate:
    8822                 case kIemNativeVarKind_VarRef:
    8823                 case kIemNativeVarKind_GstRegRef:
    8824                     /* It is weird to have any of these loaded at this point. */
    8825                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    8826                     continue;
    8827 
    8828                 case kIemNativeVarKind_End:
    8829                 case kIemNativeVarKind_Invalid:
    8830                     break;
    8831             }
    8832             AssertFailed();
    8833         } while (fHstRegs);
    8834     }
    8835 #endif
    8836     return off;
    8837 }
    8838 
    8839 
    8840 /**
    8841  * Emit code to restore volatile registers after to a call to a helper.
    8842  *
    8843  * @returns New @a off value.
    8844  * @param   pReNative           The recompiler state.
    8845  * @param   off                 The code buffer position.
    8846  * @param   fHstRegsNotToSave   Set of registers not to save & restore.
    8847  * @see     iemNativeVarSaveVolatileRegsPreHlpCall(),
    8848  *          iemNativeRegRestoreGuestShadowsInVolatileRegs()
    8849  */
    8850 DECL_HIDDEN_THROW(uint32_t)
    8851 iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
    8852 {
    8853     uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
    8854     if (fHstRegs)
    8855     {
    8856         do
    8857         {
    8858             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    8859             fHstRegs &= ~RT_BIT_32(idxHstReg);
    8860 
    8861             if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
    8862             {
    8863                 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    8864                 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8865                 AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
    8866                            && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
    8867                            && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
    8868                            IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    8869                 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
    8870                 {
    8871                     case kIemNativeVarKind_Stack:
    8872                     {
    8873                         /* Unspill the variable register. */
    8874                         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    8875                         Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
    8876                                idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8877                         off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    8878                         continue;
    8879                     }
    8880 
    8881                     case kIemNativeVarKind_Immediate:
    8882                     case kIemNativeVarKind_VarRef:
    8883                     case kIemNativeVarKind_GstRegRef:
    8884                         /* It is weird to have any of these loaded at this point. */
    8885                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    8886                         continue;
    8887 
    8888                     case kIemNativeVarKind_End:
    8889                     case kIemNativeVarKind_Invalid:
    8890                         break;
    8891                 }
    8892                 AssertFailed();
    8893             }
    8894             else
    8895             {
    8896                 /*
    8897                  * Restore from temporary stack slot.
    8898                  */
    8899                 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
    8900                 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
    8901                 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
    8902                 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
    8903 
    8904                 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    8905             }
    8906         } while (fHstRegs);
    8907     }
    8908 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    8909     fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
    8910     if (fHstRegs)
    8911     {
    8912         do
    8913         {
    8914             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    8915             fHstRegs &= ~RT_BIT_32(idxHstReg);
    8916 
    8917             if (   pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
    8918                 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
    8919                 continue;
    8920             Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
    8921 
    8922             uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
    8923             IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    8924             AssertStmt(   IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
    8925                        && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
    8926                        && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
    8927                        && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
    8928                        && (   pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
    8929                            || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
    8930                        IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    8931             switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
    8932             {
    8933                 case kIemNativeVarKind_Stack:
    8934                 {
    8935                     /* Unspill the variable register. */
    8936                     uint8_t const cbVar        = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
    8937                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    8938                     Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
    8939                            idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    8940 
    8941                     if (cbVar == sizeof(RTUINT128U))
    8942                         off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    8943                     else
    8944                         off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    8945                     continue;
    8946                 }
    8947 
    8948                 case kIemNativeVarKind_Immediate:
    8949                 case kIemNativeVarKind_VarRef:
    8950                 case kIemNativeVarKind_GstRegRef:
    8951                     /* It is weird to have any of these loaded at this point. */
    8952                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    8953                     continue;
    8954 
    8955                 case kIemNativeVarKind_End:
    8956                 case kIemNativeVarKind_Invalid:
    8957                     break;
    8958             }
    8959             AssertFailed();
    8960         } while (fHstRegs);
    8961     }
    8962 #endif
    8963     return off;
    8964 }
    8965 
    8966 
    8967 /**
    8968  * Worker that frees the stack slots for variable @a idxVar if any allocated.
    8969  *
    8970  * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
    8971  *
    8972  * ASSUMES that @a idxVar is valid and unpacked.
    8973  */
    8974 DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    8975 {
    8976     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
    8977     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    8978     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    8979     {
    8980         uint8_t const  cbVar      = pReNative->Core.aVars[idxVar].cbVar;
    8981         uint8_t const  cSlots     = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
    8982         uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
    8983         Assert(cSlots > 0);
    8984         Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
    8985         Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
    8986                idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
    8987         pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
    8988         pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
    8989     }
    8990     else
    8991         Assert(idxStackSlot == UINT8_MAX);
    8992 }
    8993 
    8994 
    8995 /**
    8996  * Worker that frees a single variable.
    8997  *
    8998  * ASSUMES that @a idxVar is valid and unpacked.
    8999  */
    9000 DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    9001 {
    9002     Assert(   pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid  /* Including invalid as we may have unused */
    9003            && pReNative->Core.aVars[idxVar].enmKind <  kIemNativeVarKind_End);    /* variables in conditional branches. */
    9004     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    9005 
    9006     /* Free the host register first if any assigned. */
    9007     uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    9008 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    9009     if (   idxHstReg != UINT8_MAX
    9010         && pReNative->Core.aVars[idxVar].fSimdReg)
    9011     {
    9012         Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
    9013         Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
    9014         pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
    9015         pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
    9016     }
    9017     else
    9018 #endif
    9019     if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9020     {
    9021         Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
    9022         pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    9023         pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    9024     }
    9025 
    9026     /* Free argument mapping. */
    9027     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    9028     if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
    9029         pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
    9030 
    9031     /* Free the stack slots. */
    9032     iemNativeVarFreeStackSlots(pReNative, idxVar);
    9033 
    9034     /* Free the actual variable. */
    9035     pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
    9036     pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    9037 }
    9038 
    9039 
    9040 /**
    9041  * Worker for iemNativeVarFreeAll that's called when there is anything to do.
    9042  */
    9043 DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
    9044 {
    9045     while (bmVars != 0)
    9046     {
    9047         uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    9048         bmVars &= ~RT_BIT_32(idxVar);
    9049 
    9050 #if 1 /** @todo optimize by simplifying this later... */
    9051         iemNativeVarFreeOneWorker(pReNative, idxVar);
    9052 #else
    9053         /* Only need to free the host register, the rest is done as bulk updates below. */
    9054         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    9055         if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9056         {
    9057             Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
    9058             pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    9059             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    9060         }
    9061 #endif
    9062     }
    9063 #if 0 /** @todo optimize by simplifying this later... */
    9064     pReNative->Core.bmVars     = 0;
    9065     pReNative->Core.bmStack    = 0;
    9066     pReNative->Core.u64ArgVars = UINT64_MAX;
    9067 #endif
    9068 }
    9069 
    9070 
    9071 
    9072 /*********************************************************************************************************************************
    9073 *   Emitters for IEM_MC_CALL_CIMPL_XXX                                                                                           *
    9074 *********************************************************************************************************************************/
    9075 
    9076 /**
    9077  * Emits code to load a reference to the given guest register into @a idxGprDst.
    9078   */
    9079 DECL_HIDDEN_THROW(uint32_t)
    9080 iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
    9081                                IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
    9082 {
    9083 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    9084     /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
    9085 #endif
    9086 
    9087     /*
    9088      * Get the offset relative to the CPUMCTX structure.
    9089      */
    9090     uint32_t offCpumCtx;
    9091     switch (enmClass)
    9092     {
    9093         case kIemNativeGstRegRef_Gpr:
    9094             Assert(idxRegInClass < 16);
    9095             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
    9096             break;
    9097 
    9098         case kIemNativeGstRegRef_GprHighByte:    /**< AH, CH, DH, BH*/
    9099             Assert(idxRegInClass < 4);
    9100             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
    9101             break;
    9102 
    9103         case kIemNativeGstRegRef_EFlags:
    9104             Assert(idxRegInClass == 0);
    9105             offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
    9106             break;
    9107 
    9108         case kIemNativeGstRegRef_MxCsr:
    9109             Assert(idxRegInClass == 0);
    9110             offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
    9111             break;
    9112 
    9113         case kIemNativeGstRegRef_FpuReg:
    9114             Assert(idxRegInClass < 8);
    9115             AssertFailed(); /** @todo what kind of indexing? */
    9116             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    9117             break;
    9118 
    9119         case kIemNativeGstRegRef_MReg:
    9120             Assert(idxRegInClass < 8);
    9121             AssertFailed(); /** @todo what kind of indexing? */
    9122             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    9123             break;
    9124 
    9125         case kIemNativeGstRegRef_XReg:
    9126             Assert(idxRegInClass < 16);
    9127             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
    9128             break;
    9129 
    9130         case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
    9131             Assert(idxRegInClass == 0);
    9132             offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
    9133             break;
    9134 
    9135         case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
    9136             Assert(idxRegInClass == 0);
    9137             offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
    9138             break;
    9139 
    9140         default:
    9141             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
    9142     }
    9143 
    9144     /*
    9145      * Load the value into the destination register.
    9146      */
    9147 #ifdef RT_ARCH_AMD64
    9148     off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
    9149 
    9150 #elif defined(RT_ARCH_ARM64)
    9151     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    9152     Assert(offCpumCtx < 4096);
    9153     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
    9154 
    9155 #else
    9156 # error "Port me!"
    9157 #endif
    9158 
    9159     return off;
    9160 }
    9161 
    9162 
    9163 /**
    9164  * Common code for CIMPL and AIMPL calls.
    9165  *
    9166  * These are calls that uses argument variables and such.  They should not be
    9167  * confused with internal calls required to implement an MC operation,
    9168  * like a TLB load and similar.
    9169  *
    9170  * Upon return all that is left to do is to load any hidden arguments and
    9171  * perform the call. All argument variables are freed.
    9172  *
    9173  * @returns New code buffer offset; throws VBox status code on error.
    9174  * @param   pReNative       The native recompile state.
    9175  * @param   off             The code buffer offset.
    9176  * @param   cArgs           The total nubmer of arguments (includes hidden
    9177  *                          count).
    9178  * @param   cHiddenArgs     The number of hidden arguments.  The hidden
    9179  *                          arguments must not have any variable declared for
    9180  *                          them, whereas all the regular arguments must
    9181  *                          (tstIEMCheckMc ensures this).
    9182  * @param   fFlushPendingWrites Flag whether to flush pending writes (default true),
    9183  *                              this will still flush pending writes in call volatile registers if false.
    9184  */
    9185 DECL_HIDDEN_THROW(uint32_t)
    9186 iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
    9187                         bool fFlushPendingWrites /*= true*/)
    9188 {
    9189 #ifdef VBOX_STRICT
    9190     /*
    9191      * Assert sanity.
    9192      */
    9193     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    9194     Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
    9195     for (unsigned i = 0; i < cHiddenArgs; i++)
    9196         Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
    9197     for (unsigned i = cHiddenArgs; i < cArgs; i++)
    9198     {
    9199         Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
    9200         Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
    9201     }
    9202     iemNativeRegAssertSanity(pReNative);
    9203 #endif
    9204 
    9205     /* We don't know what the called function makes use of, so flush any pending register writes. */
    9206     RT_NOREF(fFlushPendingWrites);
    9207 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    9208     if (fFlushPendingWrites)
    9209 #endif
    9210         off = iemNativeRegFlushPendingWrites(pReNative, off);
    9211 
    9212     /*
    9213      * Before we do anything else, go over variables that are referenced and
    9214      * make sure they are not in a register.
    9215      */
    9216     uint32_t bmVars = pReNative->Core.bmVars;
    9217     if (bmVars)
    9218     {
    9219         do
    9220         {
    9221             uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    9222             bmVars &= ~RT_BIT_32(idxVar);
    9223 
    9224             if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
    9225             {
    9226                 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
    9227 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    9228                 if (   idxRegOld != UINT8_MAX
    9229                     && pReNative->Core.aVars[idxVar].fSimdReg)
    9230                 {
    9231                     Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
    9232                     Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
    9233 
    9234                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    9235                     Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
    9236                            idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
    9237                            idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    9238                     if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
    9239                         off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    9240                     else
    9241                         off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    9242 
    9243                     Assert(!(   (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
    9244                               & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
    9245 
    9246                     pReNative->Core.aVars[idxVar].idxReg       = UINT8_MAX;
    9247                     pReNative->Core.bmHstSimdRegs              &= ~RT_BIT_32(idxRegOld);
    9248                     pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    9249                     pReNative->Core.bmGstSimdRegShadows        &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
    9250                     pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
    9251                 }
    9252                 else
    9253 #endif
    9254                 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9255                 {
    9256                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
    9257                     Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
    9258                            idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
    9259                            idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    9260                     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    9261 
    9262                     pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    9263                     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    9264                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    9265                     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    9266                     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    9267                 }
    9268             }
    9269         } while (bmVars != 0);
    9270 #if 0 //def VBOX_STRICT
    9271         iemNativeRegAssertSanity(pReNative);
    9272 #endif
    9273     }
    9274 
    9275     uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
    9276 
    9277 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    9278     /*
    9279      * At the very first step go over the host registers that will be used for arguments
    9280      * don't shadow anything which needs writing back first.
    9281      */
    9282     for (uint32_t i = 0; i < cRegArgs; i++)
    9283     {
    9284         uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    9285 
    9286         /* Writeback any dirty guest shadows before using this register. */
    9287         if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
    9288             off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
    9289         Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
    9290     }
    9291 #endif
    9292 
    9293     /*
    9294      * First, go over the host registers that will be used for arguments and make
    9295      * sure they either hold the desired argument or are free.
    9296      */
    9297     if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
    9298     {
    9299         for (uint32_t i = 0; i < cRegArgs; i++)
    9300         {
    9301             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    9302             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    9303             {
    9304                 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
    9305                 {
    9306                     uint8_t const       idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
    9307                     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    9308                     PIEMNATIVEVAR const pVar   = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
    9309                     Assert(pVar->idxReg == idxArgReg);
    9310                     uint8_t const       uArgNo = pVar->uArgNo;
    9311                     if (uArgNo == i)
    9312                     { /* prefect */ }
    9313                     /* The variable allocator logic should make sure this is impossible,
    9314                        except for when the return register is used as a parameter (ARM,
    9315                        but not x86). */
    9316 #if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
    9317                     else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
    9318                     {
    9319 # ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    9320 #  error "Implement this"
    9321 # endif
    9322                         Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
    9323                         uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
    9324                         AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
    9325                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    9326                         off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
    9327                     }
    9328 #endif
    9329                     else
    9330                     {
    9331                         AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    9332 
    9333                         if (pVar->enmKind == kIemNativeVarKind_Stack)
    9334                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    9335                         else
    9336                         {
    9337                             /* just free it, can be reloaded if used again */
    9338                             pVar->idxReg               = UINT8_MAX;
    9339                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
    9340                             iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
    9341                         }
    9342                     }
    9343                 }
    9344                 else
    9345                     AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
    9346                                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
    9347             }
    9348         }
    9349 #if 0 //def VBOX_STRICT
    9350         iemNativeRegAssertSanity(pReNative);
    9351 #endif
    9352     }
    9353 
    9354     Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
    9355 
    9356 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    9357     /*
    9358      * If there are any stack arguments, make sure they are in their place as well.
    9359      *
    9360      * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
    9361      * the caller) be loading it later and it must be free (see first loop).
    9362      */
    9363     if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
    9364     {
    9365         for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
    9366         {
    9367             PIEMNATIVEVAR const pVar      = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
    9368             int32_t const       offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
    9369             if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9370             {
    9371                 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
    9372                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
    9373                 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
    9374                 pVar->idxReg = UINT8_MAX;
    9375             }
    9376             else
    9377             {
    9378                 /* Use ARG0 as temp for stuff we need registers for. */
    9379                 switch (pVar->enmKind)
    9380                 {
    9381                     case kIemNativeVarKind_Stack:
    9382                     {
    9383                         uint8_t const idxStackSlot = pVar->idxStackSlot;
    9384                         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    9385                         off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
    9386                                                        iemNativeStackCalcBpDisp(idxStackSlot));
    9387                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    9388                         continue;
    9389                     }
    9390 
    9391                     case kIemNativeVarKind_Immediate:
    9392                         off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
    9393                         continue;
    9394 
    9395                     case kIemNativeVarKind_VarRef:
    9396                     {
    9397                         uint8_t const idxOtherVar    = pVar->u.idxRefVar; /* unpacked */
    9398                         Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    9399                         uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    9400                         int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    9401                         uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    9402 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    9403                         bool    const fSimdReg       = pReNative->Core.aVars[idxOtherVar].fSimdReg;
    9404                         uint8_t const cbVar          = pReNative->Core.aVars[idxOtherVar].cbVar;
    9405                         if (   fSimdReg
    9406                             && idxRegOther != UINT8_MAX)
    9407                         {
    9408                             Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
    9409                             if (cbVar == sizeof(RTUINT128U))
    9410                                 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
    9411                             else
    9412                                 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
    9413                             iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    9414                             Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    9415                         }
    9416                         else
    9417 # endif
    9418                         if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9419                         {
    9420                             off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    9421                             iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    9422                             Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    9423                         }
    9424                         Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    9425                                && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    9426                         off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
    9427                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    9428                         continue;
    9429                     }
    9430 
    9431                     case kIemNativeVarKind_GstRegRef:
    9432                         off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
    9433                                                              pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
    9434                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    9435                         continue;
    9436 
    9437                     case kIemNativeVarKind_Invalid:
    9438                     case kIemNativeVarKind_End:
    9439                         break;
    9440                 }
    9441                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    9442             }
    9443         }
    9444 # if 0 //def VBOX_STRICT
    9445         iemNativeRegAssertSanity(pReNative);
    9446 # endif
    9447     }
    9448 #else
    9449     AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
    9450 #endif
    9451 
    9452     /*
    9453      * Make sure the argument variables are loaded into their respective registers.
    9454      *
    9455      * We can optimize this by ASSUMING that any register allocations are for
    9456      * registeres that have already been loaded and are ready.  The previous step
    9457      * saw to that.
    9458      */
    9459     if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
    9460     {
    9461         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    9462         {
    9463             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    9464             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    9465                 Assert(   pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
    9466                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
    9467                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
    9468             else
    9469             {
    9470                 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
    9471                 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9472                 {
    9473                     Assert(pVar->enmKind == kIemNativeVarKind_Stack);
    9474                     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
    9475                     pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
    9476                                               | RT_BIT_32(idxArgReg);
    9477                     pVar->idxReg = idxArgReg;
    9478                 }
    9479                 else
    9480                 {
    9481                     /* Use ARG0 as temp for stuff we need registers for. */
    9482                     switch (pVar->enmKind)
    9483                     {
    9484                         case kIemNativeVarKind_Stack:
    9485                         {
    9486                             uint8_t const idxStackSlot = pVar->idxStackSlot;
    9487                             AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    9488                             off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
    9489                             continue;
    9490                         }
    9491 
    9492                         case kIemNativeVarKind_Immediate:
    9493                             off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
    9494                             continue;
    9495 
    9496                         case kIemNativeVarKind_VarRef:
    9497                         {
    9498                             uint8_t const idxOtherVar    = pVar->u.idxRefVar; /* unpacked */
    9499                             Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    9500                             uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative,
    9501                                                                                     IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
    9502                             int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    9503                             uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    9504 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    9505                             bool    const fSimdReg       = pReNative->Core.aVars[idxOtherVar].fSimdReg;
    9506                             uint8_t const cbVar          = pReNative->Core.aVars[idxOtherVar].cbVar;
    9507                             if (   fSimdReg
    9508                                 && idxRegOther != UINT8_MAX)
    9509                             {
    9510                                 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
    9511                                 if (cbVar == sizeof(RTUINT128U))
    9512                                     off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
    9513                                 else
    9514                                     off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
    9515                                 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    9516                                 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    9517                             }
    9518                             else
    9519 #endif
    9520                             if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    9521                             {
    9522                                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    9523                                 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    9524                                 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    9525                             }
    9526                             Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    9527                                    && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    9528                             off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
    9529                             continue;
    9530                         }
    9531 
    9532                         case kIemNativeVarKind_GstRegRef:
    9533                             off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
    9534                                                                  pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
    9535                             continue;
    9536 
    9537                         case kIemNativeVarKind_Invalid:
    9538                         case kIemNativeVarKind_End:
    9539                             break;
    9540                     }
    9541                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    9542                 }
    9543             }
    9544         }
    9545 #if 0 //def VBOX_STRICT
    9546         iemNativeRegAssertSanity(pReNative);
    9547 #endif
    9548     }
    9549 #ifdef VBOX_STRICT
    9550     else
    9551         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    9552         {
    9553             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
    9554             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
    9555         }
    9556 #endif
    9557 
    9558     /*
    9559      * Free all argument variables (simplified).
    9560      * Their lifetime always expires with the call they are for.
    9561      */
    9562     /** @todo Make the python script check that arguments aren't used after
    9563      *        IEM_MC_CALL_XXXX. */
    9564     /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
    9565      *        a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
    9566      *        an argument value.  There is also some FPU stuff. */
    9567     for (uint32_t i = cHiddenArgs; i < cArgs; i++)
    9568     {
    9569         uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
    9570         Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    9571 
    9572         /* no need to free registers: */
    9573         AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
    9574                   ?    pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
    9575                     || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
    9576                   : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
    9577                   ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
    9578                    i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
    9579 
    9580         pReNative->Core.aidxArgVars[i] = UINT8_MAX;
    9581         pReNative->Core.bmVars        &= ~RT_BIT_32(idxVar);
    9582         iemNativeVarFreeStackSlots(pReNative, idxVar);
    9583     }
    9584     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    9585 
    9586     /*
    9587      * Flush volatile registers as we make the call.
    9588      */
    9589     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
    9590 
    9591     return off;
    9592 }
    9593 
    9594 
    9595 
    9596 /*********************************************************************************************************************************
    9597 *   TLB Lookup.                                                                                                                  *
    9598 *********************************************************************************************************************************/
    9599 
    9600 /**
    9601  * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
    9602  */
    9603 DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
    9604 {
    9605     uint8_t const  iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
    9606     uint8_t const  cbMem   = RT_BYTE2(uSegAndSizeAndAccess);
    9607     uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
    9608     Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
    9609 
    9610     /* Do the lookup manually. */
    9611     RTGCPTR const      GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
    9612     uint64_t const     uTag      = IEMTLB_CALC_TAG(    &pVCpu->iem.s.DataTlb, GCPtrFlat);
    9613     PIEMTLBENTRY const pTlbe     = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
    9614     if (RT_LIKELY(pTlbe->uTag == uTag))
    9615     {
    9616         /*
    9617          * Check TLB page table level access flags.
    9618          */
    9619         AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
    9620         uint64_t const fNoUser          = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
    9621         uint64_t const fNoWriteNoDirty  = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
    9622                                         : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
    9623         uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & (  IEMTLBE_F_PHYS_REV       | IEMTLBE_F_NO_MAPPINGR3
    9624                                                                      | IEMTLBE_F_PG_UNASSIGNED
    9625                                                                      | IEMTLBE_F_PT_NO_ACCESSED
    9626                                                                      | fNoWriteNoDirty          | fNoUser);
    9627         uint64_t const uTlbPhysRev      = pVCpu->iem.s.DataTlb.uTlbPhysRev;
    9628         if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
    9629         {
    9630             /*
    9631              * Return the address.
    9632              */
    9633             uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
    9634             if ((uintptr_t)pbAddr == uResult)
    9635                 return;
    9636             RT_NOREF(cbMem);
    9637             AssertFailed();
    9638         }
    9639         else
    9640             AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
    9641                              fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
    9642     }
    9643     else
    9644         AssertFailed();
    9645     RT_BREAKPOINT();
    9646 }
    9647 
    9648 /* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
    9649 
    9650 
    9651 
    9652 /*********************************************************************************************************************************
    9653 *   Recompiler Core.                                                                                                             *
    9654 *********************************************************************************************************************************/
    9655 
    9656 /** @callback_method_impl{FNDISREADBYTES, Dummy.} */
    9657 static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
    9658 {
    9659     RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
    9660     pDis->cbCachedInstr += cbMaxRead;
    9661     RT_NOREF(cbMinRead);
    9662     return VERR_NO_DATA;
    9663 }
    9664 
    9665 
    9666 DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
    9667 {
    9668     static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
    9669     {
    9670 #define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
    9671         ENTRY(fLocalForcedActions),
    9672         ENTRY(iem.s.rcPassUp),
    9673         ENTRY(iem.s.fExec),
    9674         ENTRY(iem.s.pbInstrBuf),
    9675         ENTRY(iem.s.uInstrBufPc),
    9676         ENTRY(iem.s.GCPhysInstrBuf),
    9677         ENTRY(iem.s.cbInstrBufTotal),
    9678         ENTRY(iem.s.idxTbCurInstr),
    9679 #ifdef VBOX_WITH_STATISTICS
    9680         ENTRY(iem.s.StatNativeTlbHitsForFetch),
    9681         ENTRY(iem.s.StatNativeTlbHitsForStore),
    9682         ENTRY(iem.s.StatNativeTlbHitsForStack),
    9683         ENTRY(iem.s.StatNativeTlbHitsForMapped),
    9684         ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
    9685         ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
    9686         ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
    9687         ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
    9688 #endif
    9689         ENTRY(iem.s.DataTlb.aEntries),
    9690         ENTRY(iem.s.DataTlb.uTlbRevision),
    9691         ENTRY(iem.s.DataTlb.uTlbPhysRev),
    9692         ENTRY(iem.s.DataTlb.cTlbHits),
    9693         ENTRY(iem.s.CodeTlb.aEntries),
    9694         ENTRY(iem.s.CodeTlb.uTlbRevision),
    9695         ENTRY(iem.s.CodeTlb.uTlbPhysRev),
    9696         ENTRY(iem.s.CodeTlb.cTlbHits),
    9697         ENTRY(pVMR3),
    9698         ENTRY(cpum.GstCtx.rax),
    9699         ENTRY(cpum.GstCtx.ah),
    9700         ENTRY(cpum.GstCtx.rcx),
    9701         ENTRY(cpum.GstCtx.ch),
    9702         ENTRY(cpum.GstCtx.rdx),
    9703         ENTRY(cpum.GstCtx.dh),
    9704         ENTRY(cpum.GstCtx.rbx),
    9705         ENTRY(cpum.GstCtx.bh),
    9706         ENTRY(cpum.GstCtx.rsp),
    9707         ENTRY(cpum.GstCtx.rbp),
    9708         ENTRY(cpum.GstCtx.rsi),
    9709         ENTRY(cpum.GstCtx.rdi),
    9710         ENTRY(cpum.GstCtx.r8),
    9711         ENTRY(cpum.GstCtx.r9),
    9712         ENTRY(cpum.GstCtx.r10),
    9713         ENTRY(cpum.GstCtx.r11),
    9714         ENTRY(cpum.GstCtx.r12),
    9715         ENTRY(cpum.GstCtx.r13),
    9716         ENTRY(cpum.GstCtx.r14),
    9717         ENTRY(cpum.GstCtx.r15),
    9718         ENTRY(cpum.GstCtx.es.Sel),
    9719         ENTRY(cpum.GstCtx.es.u64Base),
    9720         ENTRY(cpum.GstCtx.es.u32Limit),
    9721         ENTRY(cpum.GstCtx.es.Attr),
    9722         ENTRY(cpum.GstCtx.cs.Sel),
    9723         ENTRY(cpum.GstCtx.cs.u64Base),
    9724         ENTRY(cpum.GstCtx.cs.u32Limit),
    9725         ENTRY(cpum.GstCtx.cs.Attr),
    9726         ENTRY(cpum.GstCtx.ss.Sel),
    9727         ENTRY(cpum.GstCtx.ss.u64Base),
    9728         ENTRY(cpum.GstCtx.ss.u32Limit),
    9729         ENTRY(cpum.GstCtx.ss.Attr),
    9730         ENTRY(cpum.GstCtx.ds.Sel),
    9731         ENTRY(cpum.GstCtx.ds.u64Base),
    9732         ENTRY(cpum.GstCtx.ds.u32Limit),
    9733         ENTRY(cpum.GstCtx.ds.Attr),
    9734         ENTRY(cpum.GstCtx.fs.Sel),
    9735         ENTRY(cpum.GstCtx.fs.u64Base),
    9736         ENTRY(cpum.GstCtx.fs.u32Limit),
    9737         ENTRY(cpum.GstCtx.fs.Attr),
    9738         ENTRY(cpum.GstCtx.gs.Sel),
    9739         ENTRY(cpum.GstCtx.gs.u64Base),
    9740         ENTRY(cpum.GstCtx.gs.u32Limit),
    9741         ENTRY(cpum.GstCtx.gs.Attr),
    9742         ENTRY(cpum.GstCtx.rip),
    9743         ENTRY(cpum.GstCtx.eflags),
    9744         ENTRY(cpum.GstCtx.uRipInhibitInt),
    9745         ENTRY(cpum.GstCtx.cr0),
    9746         ENTRY(cpum.GstCtx.cr4),
    9747         ENTRY(cpum.GstCtx.aXcr[0]),
    9748         ENTRY(cpum.GstCtx.aXcr[1]),
    9749 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    9750         ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
    9751         ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
    9752         ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
    9753         ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
    9754         ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
    9755         ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
    9756         ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
    9757         ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
    9758         ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
    9759         ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
    9760         ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
    9761         ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
    9762         ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
    9763         ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
    9764         ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
    9765         ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
    9766         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
    9767         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
    9768         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
    9769         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
    9770         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
    9771         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
    9772         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
    9773         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
    9774         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
    9775         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
    9776         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
    9777         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
    9778         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
    9779         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
    9780         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
    9781         ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
    9782 #endif
    9783 #undef ENTRY
    9784     };
    9785 #ifdef VBOX_STRICT
    9786     static bool s_fOrderChecked = false;
    9787     if (!s_fOrderChecked)
    9788     {
    9789         s_fOrderChecked = true;
    9790         uint32_t offPrev = s_aMembers[0].off;
    9791         for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
    9792         {
    9793             Assert(s_aMembers[i].off > offPrev);
    9794             offPrev = s_aMembers[i].off;
    9795         }
    9796     }
    9797 #endif
    9798 
    9799     /*
    9800      * Binary lookup.
    9801      */
    9802     unsigned iStart = 0;
    9803     unsigned iEnd   = RT_ELEMENTS(s_aMembers);
    9804     for (;;)
    9805     {
    9806         unsigned const iCur   = iStart + (iEnd - iStart) / 2;
    9807         uint32_t const offCur = s_aMembers[iCur].off;
    9808         if (off < offCur)
    9809         {
    9810             if (iCur != iStart)
    9811                 iEnd = iCur;
    9812             else
    9813                 break;
    9814         }
    9815         else if (off > offCur)
    9816         {
    9817             if (iCur + 1 < iEnd)
    9818                 iStart = iCur + 1;
    9819             else
    9820                 break;
    9821         }
    9822         else
    9823             return s_aMembers[iCur].pszName;
    9824     }
    9825 #ifdef VBOX_WITH_STATISTICS
    9826     if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
    9827         return "iem.s.acThreadedFuncStats[iFn]";
    9828 #endif
    9829     return NULL;
    9830 }
    9831 
    9832 
    9833 DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
    9834 {
    9835     AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
    9836 #if defined(RT_ARCH_AMD64)
    9837     static const char * const a_apszMarkers[] =
    9838     {
    9839         /*[0]=*/ "unknown0",        "CheckCsLim",           "ConsiderLimChecking",  "CheckOpcodes",
    9840         /*[4]=*/ "PcAfterBranch",   "LoadTlbForNewPage",    "LoadTlbAfterBranch"
    9841     };
    9842 #endif
    9843 
    9844     char                    szDisBuf[512];
    9845     DISSTATE                Dis;
    9846     PCIEMNATIVEINSTR const  paNative      = pTb->Native.paInstructions;
    9847     uint32_t const          cNative       = pTb->Native.cInstructions;
    9848     uint32_t                offNative     = 0;
    9849 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    9850     PCIEMTBDBG const        pDbgInfo      = pTb->pDbgInfo;
    9851 #endif
    9852     DISCPUMODE              enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    9853                                           : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    9854                                           :                                                            DISCPUMODE_64BIT;
    9855 #if   defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    9856     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_64BIT;
    9857 #elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    9858     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_ARMV8_A64;
    9859 #elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    9860 # error "Port me"
    9861 #else
    9862     csh                     hDisasm       = ~(size_t)0;
    9863 # if defined(RT_ARCH_AMD64)
    9864     cs_err                  rcCs          = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
    9865 # elif defined(RT_ARCH_ARM64)
    9866     cs_err                  rcCs          = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
    9867 # else
    9868 #  error "Port me"
    9869 # endif
    9870     AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
    9871 
    9872     //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON);  - not needed as pInstr->detail doesn't provide full memory detail.
    9873     //Assert(rcCs == CS_ERR_OK);
    9874 #endif
    9875 
    9876     /*
    9877      * Print TB info.
    9878      */
    9879     pHlp->pfnPrintf(pHlp,
    9880                     "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
    9881                     "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
    9882                     pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
    9883                     pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
    9884 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    9885     if (pDbgInfo && pDbgInfo->cEntries > 1)
    9886     {
    9887         Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
    9888 
    9889         /*
    9890          * This disassembly is driven by the debug info which follows the native
    9891          * code and indicates when it starts with the next guest instructions,
    9892          * where labels are and such things.
    9893          */
    9894         uint32_t                idxThreadedCall  = 0;
    9895         uint32_t                fExec            = pTb->fFlags & UINT32_C(0x00ffffff);
    9896         uint8_t                 idxRange         = UINT8_MAX;
    9897         uint8_t const           cRanges          = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
    9898         uint32_t                offRange         = 0;
    9899         uint32_t                offOpcodes       = 0;
    9900         uint32_t const          cbOpcodes        = pTb->cbOpcodes;
    9901         RTGCPHYS                GCPhysPc         = pTb->GCPhysPc;
    9902         uint32_t const          cDbgEntries      = pDbgInfo->cEntries;
    9903         uint32_t                iDbgEntry        = 1;
    9904         uint32_t                offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
    9905 
    9906         while (offNative < cNative)
    9907         {
    9908             /* If we're at or have passed the point where the next chunk of debug
    9909                info starts, process it. */
    9910             if (offDbgNativeNext <= offNative)
    9911             {
    9912                 offDbgNativeNext = UINT32_MAX;
    9913                 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
    9914                 {
    9915                     switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
    9916                     {
    9917                         case kIemTbDbgEntryType_GuestInstruction:
    9918                         {
    9919                             /* Did the exec flag change? */
    9920                             if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
    9921                             {
    9922                                 pHlp->pfnPrintf(pHlp,
    9923                                                 "  fExec change %#08x -> %#08x %s\n",
    9924                                                 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    9925                                                 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    9926                                                                    szDisBuf, sizeof(szDisBuf)));
    9927                                 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
    9928                                 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    9929                                               : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    9930                                               :                                                      DISCPUMODE_64BIT;
    9931                             }
    9932 
    9933                             /* New opcode range? We need to fend up a spurious debug info entry here for cases
    9934                                where the compilation was aborted before the opcode was recorded and the actual
    9935                                instruction was translated to a threaded call.  This may happen when we run out
    9936                                of ranges, or when some complicated interrupts/FFs are found to be pending or
    9937                                similar.  So, we just deal with it here rather than in the compiler code as it
    9938                                is a lot simpler to do here. */
    9939                             if (   idxRange == UINT8_MAX
    9940                                 || idxRange >= cRanges
    9941                                 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
    9942                             {
    9943                                 idxRange += 1;
    9944                                 if (idxRange < cRanges)
    9945                                     offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
    9946                                 else
    9947                                     continue;
    9948                                 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
    9949                                 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
    9950                                          + (pTb->aRanges[idxRange].idxPhysPage == 0
    9951                                             ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    9952                                             : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
    9953                                 pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    9954                                                 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
    9955                                                 pTb->aRanges[idxRange].idxPhysPage);
    9956                                 GCPhysPc += offRange;
    9957                             }
    9958 
    9959                             /* Disassemble the instruction. */
    9960                             //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
    9961                             uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
    9962                             uint32_t      cbInstr    = 1;
    9963                             int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    9964                                                                  &pTb->pabOpcodes[offOpcodes], cbInstrMax,
    9965                                                                  iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    9966                             if (RT_SUCCESS(rc))
    9967                             {
    9968                                 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    9969                                                              DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    9970                                                              | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    9971                                                              NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    9972 
    9973                                 static unsigned const s_offMarker  = 55;
    9974                                 static char const     s_szMarker[] = " ; <--- guest";
    9975                                 if (cch < s_offMarker)
    9976                                 {
    9977                                     memset(&szDisBuf[cch], ' ', s_offMarker - cch);
    9978                                     cch = s_offMarker;
    9979                                 }
    9980                                 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
    9981                                     memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
    9982 
    9983                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
    9984                             }
    9985                             else
    9986                             {
    9987                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
    9988                                                 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
    9989                                 cbInstr = 1;
    9990                             }
    9991                             GCPhysPc   += cbInstr;
    9992                             offOpcodes += cbInstr;
    9993                             offRange   += cbInstr;
    9994                             continue;
    9995                         }
    9996 
    9997                         case kIemTbDbgEntryType_ThreadedCall:
    9998                             pHlp->pfnPrintf(pHlp,
    9999                                             "  Call #%u to %s (%u args) - %s\n",
    10000                                             idxThreadedCall,
    10001                                             g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    10002                                             g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    10003                                             pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
    10004                             idxThreadedCall++;
    10005                             continue;
    10006 
    10007                         case kIemTbDbgEntryType_GuestRegShadowing:
    10008                         {
    10009                             PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
    10010                             const char * const    pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
    10011                             if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
    10012                                 pHlp->pfnPrintf(pHlp, "  Guest register %s != host register %s\n", pszGstReg,
    10013                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    10014                             else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
    10015                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s \n", pszGstReg,
    10016                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
    10017                             else
    10018                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s (previously in %s)\n", pszGstReg,
    10019                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
    10020                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    10021                             continue;
    10022                         }
    10023 
    10024 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    10025                         case kIemTbDbgEntryType_GuestSimdRegShadowing:
    10026                         {
    10027                             PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
    10028                             const char * const    pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
    10029                             if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
    10030                                 pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
    10031                                                 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
    10032                             else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
    10033                                 pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
    10034                                                 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
    10035                             else
    10036                                 pHlp->pfnPrintf(pHlp, "  Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
    10037                                                 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
    10038                                                 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
    10039                             continue;
    10040                         }
    10041 #endif
    10042 
    10043                         case kIemTbDbgEntryType_Label:
    10044                         {
    10045                             const char *pszName    = "what_the_fudge";
    10046                             const char *pszComment = "";
    10047                             bool        fNumbered  = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
    10048                             switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
    10049                             {
    10050                                 case kIemNativeLabelType_Return:                pszName = "Return"; break;
    10051                                 case kIemNativeLabelType_ReturnBreak:           pszName = "ReturnBreak"; break;
    10052                                 case kIemNativeLabelType_ReturnWithFlags:       pszName = "ReturnWithFlags"; break;
    10053                                 case kIemNativeLabelType_NonZeroRetOrPassUp:    pszName = "NonZeroRetOrPassUp"; break;
    10054                                 case kIemNativeLabelType_RaiseDe:               pszName = "RaiseDe"; break;
    10055                                 case kIemNativeLabelType_RaiseUd:               pszName = "RaiseUd"; break;
    10056                                 case kIemNativeLabelType_RaiseSseRelated:       pszName = "RaiseSseRelated"; break;
    10057                                 case kIemNativeLabelType_RaiseAvxRelated:       pszName = "RaiseAvxRelated"; break;
    10058                                 case kIemNativeLabelType_RaiseSseAvxFpRelated:  pszName = "RaiseSseAvxFpRelated"; break;
    10059                                 case kIemNativeLabelType_RaiseNm:               pszName = "RaiseNm"; break;
    10060                                 case kIemNativeLabelType_RaiseGp0:              pszName = "RaiseGp0"; break;
    10061                                 case kIemNativeLabelType_RaiseMf:               pszName = "RaiseMf"; break;
    10062                                 case kIemNativeLabelType_RaiseXf:               pszName = "RaiseXf"; break;
    10063                                 case kIemNativeLabelType_ObsoleteTb:            pszName = "ObsoleteTb"; break;
    10064                                 case kIemNativeLabelType_NeedCsLimChecking:     pszName = "NeedCsLimChecking"; break;
    10065                                 case kIemNativeLabelType_CheckBranchMiss:       pszName = "CheckBranchMiss"; break;
    10066                                 case kIemNativeLabelType_If:
    10067                                     pszName = "If";
    10068                                     fNumbered = true;
    10069                                     break;
    10070                                 case kIemNativeLabelType_Else:
    10071                                     pszName = "Else";
    10072                                     fNumbered = true;
    10073                                     pszComment = "   ; regs state restored pre-if-block";
    10074                                     break;
    10075                                 case kIemNativeLabelType_Endif:
    10076                                     pszName = "Endif";
    10077                                     fNumbered = true;
    10078                                     break;
    10079                                 case kIemNativeLabelType_CheckIrq:
    10080                                     pszName = "CheckIrq_CheckVM";
    10081                                     fNumbered = true;
    10082                                     break;
    10083                                 case kIemNativeLabelType_TlbLookup:
    10084                                     pszName = "TlbLookup";
    10085                                     fNumbered = true;
    10086                                     break;
    10087                                 case kIemNativeLabelType_TlbMiss:
    10088                                     pszName = "TlbMiss";
    10089                                     fNumbered = true;
    10090                                     break;
    10091                                 case kIemNativeLabelType_TlbDone:
    10092                                     pszName = "TlbDone";
    10093                                     fNumbered = true;
    10094                                     break;
    10095                                 case kIemNativeLabelType_Invalid:
    10096                                 case kIemNativeLabelType_End:
    10097                                     break;
    10098                             }
    10099                             if (fNumbered)
    10100                                 pHlp->pfnPrintf(pHlp, "  %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
    10101                             else
    10102                                 pHlp->pfnPrintf(pHlp, "  %s:\n", pszName);
    10103                             continue;
    10104                         }
    10105 
    10106                         case kIemTbDbgEntryType_NativeOffset:
    10107                             offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
    10108                             Assert(offDbgNativeNext >= offNative);
    10109                             break;
    10110 
    10111 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
    10112                         case kIemTbDbgEntryType_DelayedPcUpdate:
    10113                             pHlp->pfnPrintf(pHlp, "  Updating guest PC value by %u (cInstrSkipped=%u)\n",
    10114                                             pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
    10115                                             pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
    10116                             continue;
    10117 #endif
    10118 
    10119 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
    10120                         case kIemTbDbgEntryType_GuestRegDirty:
    10121                         {
    10122                             PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
    10123                             const char * const    pszGstReg = pEntry->GuestRegDirty.fSimdReg
    10124                                                             ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
    10125                                                             : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
    10126                             const char * const    pszHstReg = pEntry->GuestRegDirty.fSimdReg
    10127                                                             ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
    10128                                                             : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
    10129                             pHlp->pfnPrintf(pHlp, "  Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
    10130                                             pszGstReg, pszHstReg);
    10131                             continue;
    10132                         }
    10133 
    10134                         case kIemTbDbgEntryType_GuestRegWriteback:
    10135                             pHlp->pfnPrintf(pHlp, "  Writing dirty %s registers (gst %#RX32)\n",
    10136                                             pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
    10137                                             pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg);
    10138                             continue;
    10139 #endif
    10140 
    10141                         default:
    10142                             AssertFailed();
    10143                     }
    10144                     iDbgEntry++;
    10145                     break;
    10146                 }
    10147             }
    10148 
    10149             /*
    10150              * Disassemble the next native instruction.
    10151              */
    10152             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    10153 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    10154             uint32_t               cbInstr    = sizeof(paNative[0]);
    10155             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    10156             if (RT_SUCCESS(rc))
    10157             {
    10158 #  if defined(RT_ARCH_AMD64)
    10159                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    10160                 {
    10161                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    10162                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    10163                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    10164                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    10165                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    10166                                         uInfo & 0x8000 ? "recompiled" : "todo");
    10167                     else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
    10168                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
    10169                     else
    10170                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    10171                 }
    10172                 else
    10173 #  endif
    10174                 {
    10175                     const char *pszAnnotation = NULL;
    10176 #  ifdef RT_ARCH_AMD64
    10177                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    10178                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    10179                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    10180                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    10181                     PCDISOPPARAM pMemOp;
    10182                     if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
    10183                         pMemOp = &Dis.Param1;
    10184                     else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
    10185                         pMemOp = &Dis.Param2;
    10186                     else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
    10187                         pMemOp = &Dis.Param3;
    10188                     else
    10189                         pMemOp = NULL;
    10190                     if (   pMemOp
    10191                         && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
    10192                         && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
    10193                         pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
    10194                                                                      ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
    10195 
    10196 #elif defined(RT_ARCH_ARM64)
    10197                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    10198                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    10199                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    10200 #  else
    10201 #   error "Port me"
    10202 #  endif
    10203                     if (pszAnnotation)
    10204                     {
    10205                         static unsigned const s_offAnnotation = 55;
    10206                         size_t const          cchAnnotation   = strlen(pszAnnotation);
    10207                         size_t                cchDis          = strlen(szDisBuf);
    10208                         if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
    10209                         {
    10210                             if (cchDis < s_offAnnotation)
    10211                             {
    10212                                 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
    10213                                 cchDis = s_offAnnotation;
    10214                             }
    10215                             szDisBuf[cchDis++] = ' ';
    10216                             szDisBuf[cchDis++] = ';';
    10217                             szDisBuf[cchDis++] = ' ';
    10218                             memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
    10219                         }
    10220                     }
    10221                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    10222                 }
    10223             }
    10224             else
    10225             {
    10226 #  if defined(RT_ARCH_AMD64)
    10227                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    10228                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    10229 #  elif defined(RT_ARCH_ARM64)
    10230                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    10231 #  else
    10232 #   error "Port me"
    10233 #  endif
    10234                 cbInstr = sizeof(paNative[0]);
    10235             }
    10236             offNative += cbInstr / sizeof(paNative[0]);
    10237 
    10238 #  else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    10239             cs_insn *pInstr;
    10240             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    10241                                          (uintptr_t)pNativeCur, 1, &pInstr);
    10242             if (cInstrs > 0)
    10243             {
    10244                 Assert(cInstrs == 1);
    10245                 const char *pszAnnotation = NULL;
    10246 #  if defined(RT_ARCH_ARM64)
    10247                 if (   (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
    10248                     || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
    10249                 {
    10250                     /* This is bit crappy, but the disassembler provides incomplete addressing details. */
    10251                     AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
    10252                     char *psz = strchr(pInstr->op_str, '[');
    10253                     if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
    10254                     {
    10255                         uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
    10256                         int32_t        off     = -1;
    10257                         psz += 4;
    10258                         if (*psz == ']')
    10259                             off = 0;
    10260                         else if (*psz == ',')
    10261                         {
    10262                             psz = RTStrStripL(psz + 1);
    10263                             if (*psz == '#')
    10264                                 off = RTStrToInt32(&psz[1]);
    10265                             /** @todo deal with index registers and LSL as well... */
    10266                         }
    10267                         if (off >= 0)
    10268                             pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
    10269                     }
    10270                 }
    10271 #  endif
    10272 
    10273                 size_t const cchOp = strlen(pInstr->op_str);
    10274 #  if defined(RT_ARCH_AMD64)
    10275                 if (pszAnnotation)
    10276                     pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s%*s ; %s\n",
    10277                                     pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
    10278                                     cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
    10279                 else
    10280                     pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    10281                                     pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    10282 
    10283 #  else
    10284                 if (pszAnnotation)
    10285                     pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s%*s ; %s\n",
    10286                                     pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
    10287                                     cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
    10288                 else
    10289                     pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    10290                                     pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    10291 #  endif
    10292                 offNative += pInstr->size / sizeof(*pNativeCur);
    10293                 cs_free(pInstr, cInstrs);
    10294             }
    10295             else
    10296             {
    10297 #  if defined(RT_ARCH_AMD64)
    10298                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    10299                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    10300 #  else
    10301                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    10302 #  endif
    10303                 offNative++;
    10304             }
    10305 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    10306         }
    10307     }
    10308     else
    10309 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    10310     {
    10311         /*
    10312          * No debug info, just disassemble the x86 code and then the native code.
    10313          *
    10314          * First the guest code:
    10315          */
    10316         for (unsigned i = 0; i < pTb->cRanges; i++)
    10317         {
    10318             RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
    10319                               + (pTb->aRanges[i].idxPhysPage == 0
    10320                                  ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    10321                                  : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
    10322             pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    10323                             i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
    10324             unsigned       off       = pTb->aRanges[i].offOpcodes;
    10325             /** @todo this ain't working when crossing pages!   */
    10326             unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
    10327             while (off < cbOpcodes)
    10328             {
    10329                 uint32_t cbInstr = 1;
    10330                 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    10331                                                      &pTb->pabOpcodes[off], cbOpcodes - off,
    10332                                                      iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    10333                 if (RT_SUCCESS(rc))
    10334                 {
    10335                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    10336                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    10337                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    10338                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    10339                     pHlp->pfnPrintf(pHlp, "    %RGp: %s\n", GCPhysPc, szDisBuf);
    10340                     GCPhysPc += cbInstr;
    10341                     off      += cbInstr;
    10342                 }
    10343                 else
    10344                 {
    10345                     pHlp->pfnPrintf(pHlp, "    %RGp: %.*Rhxs - disassembly failure %Rrc\n",
    10346                                     GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
    10347                     break;
    10348                 }
    10349             }
    10350         }
    10351 
    10352         /*
    10353          * Then the native code:
    10354          */
    10355         pHlp->pfnPrintf(pHlp, "  Native code %p L %#x\n", paNative, cNative);
    10356         while (offNative < cNative)
    10357         {
    10358             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    10359 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    10360             uint32_t               cbInstr    = sizeof(paNative[0]);
    10361             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    10362             if (RT_SUCCESS(rc))
    10363             {
    10364 #  if defined(RT_ARCH_AMD64)
    10365                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    10366                 {
    10367                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    10368                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    10369                         pHlp->pfnPrintf(pHlp, "\n    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    10370                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    10371                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    10372                                         uInfo & 0x8000 ? "recompiled" : "todo");
    10373                     else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
    10374                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
    10375                     else
    10376                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    10377                 }
    10378                 else
    10379 #  endif
    10380                 {
    10381 #  ifdef RT_ARCH_AMD64
    10382                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    10383                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    10384                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    10385                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    10386 #  elif defined(RT_ARCH_ARM64)
    10387                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    10388                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    10389                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    10390 #  else
    10391 #   error "Port me"
    10392 #  endif
    10393                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    10394                 }
    10395             }
    10396             else
    10397             {
    10398 #  if defined(RT_ARCH_AMD64)
    10399                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    10400                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    10401 #  else
    10402                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    10403 #  endif
    10404                 cbInstr = sizeof(paNative[0]);
    10405             }
    10406             offNative += cbInstr / sizeof(paNative[0]);
    10407 
    10408 # else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    10409             cs_insn *pInstr;
    10410             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    10411                                          (uintptr_t)pNativeCur, 1, &pInstr);
    10412             if (cInstrs > 0)
    10413             {
    10414                 Assert(cInstrs == 1);
    10415 #  if defined(RT_ARCH_AMD64)
    10416                 pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    10417                                 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    10418 #  else
    10419                 pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    10420                                 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    10421 #  endif
    10422                 offNative += pInstr->size / sizeof(*pNativeCur);
    10423                 cs_free(pInstr, cInstrs);
    10424             }
    10425             else
    10426             {
    10427 #  if defined(RT_ARCH_AMD64)
    10428                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    10429                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    10430 #  else
    10431                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    10432 #  endif
    10433                 offNative++;
    10434             }
    10435 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    10436         }
    10437     }
    10438 
    10439 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    10440     /* Cleanup. */
    10441     cs_close(&hDisasm);
    10442 #endif
    10443 }
    10444 
    10445 
    10446 /**
    10447  * Recompiles the given threaded TB into a native one.
    10448  *
    10449  * In case of failure the translation block will be returned as-is.
    10450  *
    10451  * @returns pTb.
    10452  * @param   pVCpu   The cross context virtual CPU structure of the calling
    10453  *                  thread.
    10454  * @param   pTb     The threaded translation to recompile to native.
    10455  */
    10456 DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
    10457 {
    10458     STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
    10459 
    10460     /*
    10461      * The first time thru, we allocate the recompiler state, the other times
    10462      * we just need to reset it before using it again.
    10463      */
    10464     PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
    10465     if (RT_LIKELY(pReNative))
    10466         iemNativeReInit(pReNative, pTb);
    10467     else
    10468     {
    10469         pReNative = iemNativeInit(pVCpu, pTb);
    10470         AssertReturn(pReNative, pTb);
    10471     }
    10472 
    10473 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    10474     /*
    10475      * First do liveness analysis.  This is done backwards.
    10476      */
    10477     {
    10478         uint32_t idxCall = pTb->Thrd.cCalls;
    10479         if (idxCall <= pReNative->cLivenessEntriesAlloc)
    10480         { /* likely */ }
    10481         else
    10482         {
    10483             uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
    10484             while (idxCall > cAlloc)
    10485                 cAlloc *= 2;
    10486             void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
    10487             AssertReturn(pvNew, pTb);
    10488             pReNative->paLivenessEntries     = (PIEMLIVENESSENTRY)pvNew;
    10489             pReNative->cLivenessEntriesAlloc = cAlloc;
    10490         }
    10491         AssertReturn(idxCall > 0, pTb);
    10492         PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
    10493 
    10494         /* The initial (final) entry. */
    10495         idxCall--;
    10496         IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
    10497 
    10498         /* Loop backwards thru the calls and fill in the other entries. */
    10499         PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
    10500         while (idxCall > 0)
    10501         {
    10502             PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
    10503             if (pfnLiveness)
    10504                 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
    10505             else
    10506                 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
    10507             pCallEntry--;
    10508             idxCall--;
    10509         }
    10510 
    10511 # ifdef VBOX_WITH_STATISTICS
    10512         /* Check if there are any EFLAGS optimization to be had here.  This requires someone settings them
    10513            to 'clobbered' rather that 'input'.  */
    10514         /** @todo */
    10515 # endif
    10516     }
    10517 #endif
    10518 
    10519     /*
    10520      * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
    10521      * for aborting if an error happens.
    10522      */
    10523     uint32_t        cCallsLeft = pTb->Thrd.cCalls;
    10524 #ifdef LOG_ENABLED
    10525     uint32_t const  cCallsOrg  = cCallsLeft;
    10526 #endif
    10527     uint32_t        off        = 0;
    10528     int             rc         = VINF_SUCCESS;
    10529     IEMNATIVE_TRY_SETJMP(pReNative, rc)
    10530     {
    10531         /*
    10532          * Emit prolog code (fixed).
    10533          */
    10534         off = iemNativeEmitProlog(pReNative, off);
    10535 
    10536         /*
    10537          * Convert the calls to native code.
    10538          */
    10539 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    10540         int32_t              iGstInstr        = -1;
    10541 #endif
    10542 #ifndef VBOX_WITHOUT_RELEASE_STATISTICS
    10543         uint32_t             cThreadedCalls   = 0;
    10544         uint32_t             cRecompiledCalls = 0;
    10545 #endif
    10546 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
    10547         uint32_t             idxCurCall       = 0;
    10548 #endif
    10549         PCIEMTHRDEDCALLENTRY pCallEntry       = pTb->Thrd.paCalls;
    10550         pReNative->fExec                      = pTb->fFlags & IEMTB_F_IEM_F_MASK;
    10551         while (cCallsLeft-- > 0)
    10552         {
    10553             PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
    10554 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    10555             pReNative->idxCurCall                 = idxCurCall;
    10556 #endif
    10557 
    10558             /*
    10559              * Debug info, assembly markup and statistics.
    10560              */
    10561 #if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
    10562             if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
    10563                 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
    10564 #endif
    10565 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    10566             iemNativeDbgInfoAddNativeOffset(pReNative, off);
    10567             if (iGstInstr < (int32_t)pCallEntry->idxInstr)
    10568             {
    10569                 if (iGstInstr < (int32_t)pTb->cInstructions)
    10570                     iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
    10571                 else
    10572                     Assert(iGstInstr == pTb->cInstructions);
    10573                 iGstInstr = pCallEntry->idxInstr;
    10574             }
    10575             iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
    10576 #endif
    10577 #if defined(VBOX_STRICT)
    10578             off = iemNativeEmitMarker(pReNative, off,
    10579                                       RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
    10580 #endif
    10581 #if defined(VBOX_STRICT)
    10582             iemNativeRegAssertSanity(pReNative);
    10583 #endif
    10584 #ifdef VBOX_WITH_STATISTICS
    10585             off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
    10586 #endif
    10587 
    10588             /*
    10589              * Actual work.
    10590              */
    10591             Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
    10592                   pfnRecom ? "(recompiled)" : "(todo)"));
    10593             if (pfnRecom) /** @todo stats on this.   */
    10594             {
    10595                 off = pfnRecom(pReNative, off, pCallEntry);
    10596                 STAM_REL_STATS({cRecompiledCalls++;});
    10597             }
    10598             else
    10599             {
    10600                 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
    10601                 STAM_REL_STATS({cThreadedCalls++;});
    10602             }
    10603             Assert(off <= pReNative->cInstrBufAlloc);
    10604             Assert(pReNative->cCondDepth == 0);
    10605 
    10606 #if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
    10607             if (LogIs2Enabled())
    10608             {
    10609                 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
    10610 # ifndef IEMLIVENESS_EXTENDED_LAYOUT
    10611                 static const char s_achState[] = "CUXI";
    10612 # else
    10613                 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
    10614 # endif
    10615 
    10616                 char szGpr[17];
    10617                 for (unsigned i = 0; i < 16; i++)
    10618                     szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
    10619                 szGpr[16] = '\0';
    10620 
    10621                 char szSegBase[X86_SREG_COUNT + 1];
    10622                 char szSegLimit[X86_SREG_COUNT + 1];
    10623                 char szSegAttrib[X86_SREG_COUNT + 1];
    10624                 char szSegSel[X86_SREG_COUNT + 1];
    10625                 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
    10626                 {
    10627                     szSegBase[i]   = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
    10628                     szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
    10629                     szSegLimit[i]  = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
    10630                     szSegSel[i]    = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
    10631                 }
    10632                 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
    10633                     = szSegSel[X86_SREG_COUNT] = '\0';
    10634 
    10635                 char szEFlags[8];
    10636                 for (unsigned i = 0; i < 7; i++)
    10637                     szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
    10638                 szEFlags[7] = '\0';
    10639 
    10640                 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
    10641                       szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
    10642             }
    10643 #endif
    10644 
    10645             /*
    10646              * Advance.
    10647              */
    10648             pCallEntry++;
    10649 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
    10650             idxCurCall++;
    10651 #endif
    10652         }
    10653 
    10654         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
    10655         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded,   cThreadedCalls);
    10656         if (!cThreadedCalls)
    10657             STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
    10658 
    10659         /*
    10660          * Emit the epilog code.
    10661          */
    10662         uint32_t idxReturnLabel;
    10663         off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
    10664 
    10665         /*
    10666          * Generate special jump labels.
    10667          */
    10668         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
    10669             off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
    10670         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
    10671             off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
    10672 
    10673         /*
    10674          * Generate simple TB tail labels that just calls a help with a pVCpu
    10675          * arg and either return or longjmps/throws a non-zero status.
    10676          *
    10677          * The array entries must be ordered by enmLabel value so we can index
    10678          * using fTailLabels bit numbers.
    10679          */
    10680         typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
    10681         static struct
    10682         {
    10683             IEMNATIVELABELTYPE              enmLabel;
    10684             PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
    10685         } const g_aSimpleTailLabels[] =
    10686         {
    10687             {   kIemNativeLabelType_Invalid,                NULL },
    10688             {   kIemNativeLabelType_RaiseDe,                iemNativeHlpExecRaiseDe },
    10689             {   kIemNativeLabelType_RaiseUd,                iemNativeHlpExecRaiseUd },
    10690             {   kIemNativeLabelType_RaiseSseRelated,        iemNativeHlpExecRaiseSseRelated },
    10691             {   kIemNativeLabelType_RaiseAvxRelated,        iemNativeHlpExecRaiseAvxRelated },
    10692             {   kIemNativeLabelType_RaiseSseAvxFpRelated,   iemNativeHlpExecRaiseSseAvxFpRelated },
    10693             {   kIemNativeLabelType_RaiseNm,                iemNativeHlpExecRaiseNm },
    10694             {   kIemNativeLabelType_RaiseGp0,               iemNativeHlpExecRaiseGp0 },
    10695             {   kIemNativeLabelType_RaiseMf,                iemNativeHlpExecRaiseMf },
    10696             {   kIemNativeLabelType_RaiseXf,                iemNativeHlpExecRaiseXf },
    10697             {   kIemNativeLabelType_ObsoleteTb,             iemNativeHlpObsoleteTb },
    10698             {   kIemNativeLabelType_NeedCsLimChecking,      iemNativeHlpNeedCsLimChecking },
    10699             {   kIemNativeLabelType_CheckBranchMiss,        iemNativeHlpCheckBranchMiss },
    10700         };
    10701         AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
    10702         AssertCompile(kIemNativeLabelType_Invalid == 0);
    10703         uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
    10704         if (fTailLabels)
    10705         {
    10706             do
    10707             {
    10708                 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
    10709                 fTailLabels &= ~RT_BIT_64(enmLabel);
    10710                 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
    10711 
    10712                 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
    10713                 Assert(idxLabel != UINT32_MAX);
    10714                 if (idxLabel != UINT32_MAX)
    10715                 {
    10716                     iemNativeLabelDefine(pReNative, idxLabel, off);
    10717 
    10718                     /* int pfnCallback(PVMCPUCC pVCpu) */
    10719                     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    10720                     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
    10721 
    10722                     /* jump back to the return sequence. */
    10723                     off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    10724                 }
    10725 
    10726             } while (fTailLabels);
    10727         }
    10728     }
    10729     IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
    10730     {
    10731         Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
    10732         return pTb;
    10733     }
    10734     IEMNATIVE_CATCH_LONGJMP_END(pReNative);
    10735     Assert(off <= pReNative->cInstrBufAlloc);
    10736 
    10737     /*
    10738      * Make sure all labels has been defined.
    10739      */
    10740     PIEMNATIVELABEL const paLabels = pReNative->paLabels;
    10741 #ifdef VBOX_STRICT
    10742     uint32_t const        cLabels  = pReNative->cLabels;
    10743     for (uint32_t i = 0; i < cLabels; i++)
    10744         AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
    10745 #endif
    10746 
    10747     /*
    10748      * Allocate executable memory, copy over the code we've generated.
    10749      */
    10750     PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
    10751     if (pTbAllocator->pDelayedFreeHead)
    10752         iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
    10753 
    10754     PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
    10755     AssertReturn(paFinalInstrBuf, pTb);
    10756     memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
    10757 
    10758     /*
    10759      * Apply fixups.
    10760      */
    10761     PIEMNATIVEFIXUP const paFixups   = pReNative->paFixups;
    10762     uint32_t const        cFixups    = pReNative->cFixups;
    10763     for (uint32_t i = 0; i < cFixups; i++)
    10764     {
    10765         Assert(paFixups[i].off < off);
    10766         Assert(paFixups[i].idxLabel < cLabels);
    10767         AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
    10768                   ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
    10769                    paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
    10770         RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
    10771         switch (paFixups[i].enmType)
    10772         {
    10773 #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
    10774             case kIemNativeFixupType_Rel32:
    10775                 Assert(paFixups[i].off + 4 <= off);
    10776                 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    10777                 continue;
    10778 
    10779 #elif defined(RT_ARCH_ARM64)
    10780             case kIemNativeFixupType_RelImm26At0:
    10781             {
    10782                 Assert(paFixups[i].off < off);
    10783                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    10784                 Assert(offDisp >= -262144 && offDisp < 262144);
    10785                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
    10786                 continue;
    10787             }
    10788 
    10789             case kIemNativeFixupType_RelImm19At5:
    10790             {
    10791                 Assert(paFixups[i].off < off);
    10792                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    10793                 Assert(offDisp >= -262144 && offDisp < 262144);
    10794                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
    10795                 continue;
    10796             }
    10797 
    10798             case kIemNativeFixupType_RelImm14At5:
    10799             {
    10800                 Assert(paFixups[i].off < off);
    10801                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    10802                 Assert(offDisp >= -8192 && offDisp < 8192);
    10803                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
    10804                 continue;
    10805             }
    10806 
    10807 #endif
    10808             case kIemNativeFixupType_Invalid:
    10809             case kIemNativeFixupType_End:
    10810                 break;
    10811         }
    10812         AssertFailed();
    10813     }
    10814 
    10815     iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
    10816     STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
    10817 
    10818     /*
    10819      * Convert the translation block.
    10820      */
    10821     RTMemFree(pTb->Thrd.paCalls);
    10822     pTb->Native.paInstructions  = paFinalInstrBuf;
    10823     pTb->Native.cInstructions   = off;
    10824     pTb->fFlags                 = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
    10825 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    10826     pTb->pDbgInfo               = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
    10827                                                       RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
    10828 #endif
    10829 
    10830     Assert(pTbAllocator->cThreadedTbs > 0);
    10831     pTbAllocator->cThreadedTbs -= 1;
    10832     pTbAllocator->cNativeTbs   += 1;
    10833     Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
    10834 
    10835 #ifdef LOG_ENABLED
    10836     /*
    10837      * Disassemble to the log if enabled.
    10838      */
    10839     if (LogIs3Enabled())
    10840     {
    10841         Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
    10842         iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
    10843 # if defined(DEBUG_bird) || defined(DEBUG_aeichner)
    10844         RTLogFlush(NULL);
    10845 # endif
    10846     }
    10847 #endif
    10848     /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
    10849 
    10850     STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
    10851     return pTb;
    10852 }
    10853 
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r104114 r104115  
    6767#endif
    6868
    69 #ifdef RT_OS_WINDOWS
    70 # include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
    71 extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
    72 extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
    73 #else
    74 # include <iprt/formats/dwarf.h>
    75 # if defined(RT_OS_DARWIN)
    76 #  include <libkern/OSCacheControl.h>
    77 #  define IEMNATIVE_USE_LIBUNWIND
    78 extern "C" void  __register_frame(const void *pvFde);
    79 extern "C" void  __deregister_frame(const void *pvFde);
    80 # else
    81 #  ifdef DEBUG_bird /** @todo not thread safe yet */
    82 #   define IEMNATIVE_USE_GDB_JIT
    83 #  endif
    84 #  ifdef IEMNATIVE_USE_GDB_JIT
    85 #   include <iprt/critsect.h>
    86 #   include <iprt/once.h>
    87 #   include <iprt/formats/elf64.h>
    88 #  endif
    89 extern "C" void  __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
    90 extern "C" void *__deregister_frame_info(void *pvBegin);           /* (returns pvObj from __register_frame_info call) */
    91 # endif
    92 #endif
    9369#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    9470# include "/opt/local/include/capstone/capstone.h"
     
    137113DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
    138114
    139 
    140 /*********************************************************************************************************************************
    141 *   Executable Memory Allocator                                                                                                  *
    142 *********************************************************************************************************************************/
    143 /** The chunk sub-allocation unit size in bytes. */
    144 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE      128
    145 /** The chunk sub-allocation unit size as a shift factor. */
    146 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT     7
    147 /** Enables adding a header to the sub-allocator allocations.
    148  * This is useful for freeing up executable memory among other things.  */
    149 #define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
    150 /** Use alternative pruning. */
    151 #define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
    152 
    153 
    154 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    155 # ifdef IEMNATIVE_USE_GDB_JIT
    156 #   define IEMNATIVE_USE_GDB_JIT_ET_DYN
    157 
    158 /** GDB JIT: Code entry.   */
    159 typedef struct GDBJITCODEENTRY
    160 {
    161     struct GDBJITCODEENTRY *pNext;
    162     struct GDBJITCODEENTRY *pPrev;
    163     uint8_t                *pbSymFile;
    164     uint64_t                cbSymFile;
    165 } GDBJITCODEENTRY;
    166 
    167 /** GDB JIT: Actions. */
    168 typedef enum GDBJITACTIONS : uint32_t
    169 {
    170     kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
    171 } GDBJITACTIONS;
    172 
    173 /** GDB JIT: Descriptor. */
    174 typedef struct GDBJITDESCRIPTOR
    175 {
    176     uint32_t            uVersion;
    177     GDBJITACTIONS       enmAction;
    178     GDBJITCODEENTRY    *pRelevant;
    179     GDBJITCODEENTRY    *pHead;
    180     /** Our addition: */
    181     GDBJITCODEENTRY    *pTail;
    182 } GDBJITDESCRIPTOR;
    183 
    184 /** GDB JIT: Our simple symbol file data. */
    185 typedef struct GDBJITSYMFILE
    186 {
    187     Elf64_Ehdr          EHdr;
    188 #  ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
    189     Elf64_Shdr          aShdrs[5];
    190 #  else
    191     Elf64_Shdr          aShdrs[7];
    192     Elf64_Phdr          aPhdrs[2];
    193 #  endif
    194     /** The dwarf ehframe data for the chunk. */
    195     uint8_t             abEhFrame[512];
    196     char                szzStrTab[128];
    197     Elf64_Sym           aSymbols[3];
    198 #  ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    199     Elf64_Sym           aDynSyms[2];
    200     Elf64_Dyn           aDyn[6];
    201 #  endif
    202 } GDBJITSYMFILE;
    203 
    204 extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
    205 extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
    206 
    207 /** Init once for g_IemNativeGdbJitLock. */
    208 static RTONCE     g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
    209 /** Init once for the critical section. */
    210 static RTCRITSECT g_IemNativeGdbJitLock;
    211 
    212 /** GDB reads the info here. */
    213 GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
    214 
    215 /** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
    216 DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
    217 {
    218     ASMNopPause();
    219 }
    220 
    221 /** @callback_method_impl{FNRTONCE} */
    222 static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
    223 {
    224     RT_NOREF(pvUser);
    225     return RTCritSectInit(&g_IemNativeGdbJitLock);
    226 }
    227 
    228 
    229 # endif /* IEMNATIVE_USE_GDB_JIT */
    230 
    231 /**
    232  * Per-chunk unwind info for non-windows hosts.
    233  */
    234 typedef struct IEMEXECMEMCHUNKEHFRAME
    235 {
    236 # ifdef IEMNATIVE_USE_LIBUNWIND
    237     /** The offset of the FDA into abEhFrame. */
    238     uintptr_t               offFda;
    239 # else
    240     /** 'struct object' storage area. */
    241     uint8_t                 abObject[1024];
    242 # endif
    243 #  ifdef IEMNATIVE_USE_GDB_JIT
    244 #   if 0
    245     /** The GDB JIT 'symbol file' data. */
    246     GDBJITSYMFILE           GdbJitSymFile;
    247 #   endif
    248     /** The GDB JIT list entry. */
    249     GDBJITCODEENTRY         GdbJitEntry;
    250 #  endif
    251     /** The dwarf ehframe data for the chunk. */
    252     uint8_t                 abEhFrame[512];
    253 } IEMEXECMEMCHUNKEHFRAME;
    254 /** Pointer to per-chunk info info for non-windows hosts. */
    255 typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
    256 #endif
    257 
    258 
    259 /**
    260  * An chunk of executable memory.
    261  */
    262 typedef struct IEMEXECMEMCHUNK
    263 {
    264     /** Number of free items in this chunk. */
    265     uint32_t                cFreeUnits;
    266     /** Hint were to start searching for free space in the allocation bitmap. */
    267     uint32_t                idxFreeHint;
    268     /** Pointer to the chunk. */
    269     void                   *pvChunk;
    270 #ifdef IN_RING3
    271     /**
    272      * Pointer to the unwind information.
    273      *
    274      * This is used during C++ throw and longjmp (windows and probably most other
    275      * platforms).  Some debuggers (windbg) makes use of it as well.
    276      *
    277      * Windows: This is allocated from hHeap on windows because (at least for
    278      *          AMD64) the UNWIND_INFO structure address in the
    279      *          RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
    280      *
    281      * Others:  Allocated from the regular heap to avoid unnecessary executable data
    282      *          structures.  This points to an IEMEXECMEMCHUNKEHFRAME structure. */
    283     void                   *pvUnwindInfo;
    284 #elif defined(IN_RING0)
    285     /** Allocation handle. */
    286     RTR0MEMOBJ              hMemObj;
    287 #endif
    288 } IEMEXECMEMCHUNK;
    289 /** Pointer to a memory chunk. */
    290 typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
    291 
    292 
    293 /**
    294  * Executable memory allocator for the native recompiler.
    295  */
    296 typedef struct IEMEXECMEMALLOCATOR
    297 {
    298     /** Magic value (IEMEXECMEMALLOCATOR_MAGIC).  */
    299     uint32_t                uMagic;
    300 
    301     /** The chunk size. */
    302     uint32_t                cbChunk;
    303     /** The maximum number of chunks. */
    304     uint32_t                cMaxChunks;
    305     /** The current number of chunks. */
    306     uint32_t                cChunks;
    307     /** Hint where to start looking for available memory. */
    308     uint32_t                idxChunkHint;
    309     /** Statistics: Current number of allocations. */
    310     uint32_t                cAllocations;
    311 
    312     /** The total amount of memory available. */
    313     uint64_t                cbTotal;
    314     /** Total amount of free memory. */
    315     uint64_t                cbFree;
    316     /** Total amount of memory allocated. */
    317     uint64_t                cbAllocated;
    318 
    319     /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
    320      *
    321      * Since the chunk size is a power of two and the minimum chunk size is a lot
    322      * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
    323      * require a whole number of uint64_t elements in the allocation bitmap.  So,
    324      * for sake of simplicity, they are allocated as one continous chunk for
    325      * simplicity/laziness. */
    326     uint64_t               *pbmAlloc;
    327     /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
    328     uint32_t                cUnitsPerChunk;
    329     /** Number of bitmap elements per chunk (for quickly locating the bitmap
    330      * portion corresponding to an chunk). */
    331     uint32_t                cBitmapElementsPerChunk;
    332 
    333 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
    334     /** The next chunk to prune in. */
    335     uint32_t                idxChunkPrune;
    336     /** Where in chunk offset to start pruning at. */
    337     uint32_t                offChunkPrune;
    338     /** Profiling the pruning code. */
    339     STAMPROFILE             StatPruneProf;
    340     /** Number of bytes recovered by the pruning. */
    341     STAMPROFILE             StatPruneRecovered;
    342 #endif
    343 
    344 #ifdef VBOX_WITH_STATISTICS
    345     STAMPROFILE             StatAlloc;
    346 #endif
    347 
    348 
    349 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    350     /** Pointer to the array of unwind info running parallel to aChunks (same
    351      * allocation as this structure, located after the bitmaps).
    352      * (For Windows, the structures must reside in 32-bit RVA distance to the
    353      * actual chunk, so they are allocated off the chunk.) */
    354     PIEMEXECMEMCHUNKEHFRAME paEhFrames;
    355 #endif
    356 
    357     /** The allocation chunks. */
    358     RT_FLEXIBLE_ARRAY_EXTENSION
    359     IEMEXECMEMCHUNK         aChunks[RT_FLEXIBLE_ARRAY];
    360 } IEMEXECMEMALLOCATOR;
    361 /** Pointer to an executable memory allocator. */
    362 typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
    363 
    364 /** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
    365 #define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
    366 
    367 
    368 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
    369 /**
    370  * Allocation header.
    371  */
    372 typedef struct IEMEXECMEMALLOCHDR
    373 {
    374     /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
    375     uint32_t        uMagic;
    376     /** The allocation chunk (for speeding up freeing). */
    377     uint32_t        idxChunk;
    378     /** Pointer to the translation block the allocation belongs to.
    379      * This is the whole point of the header. */
    380     PIEMTB          pTb;
    381 } IEMEXECMEMALLOCHDR;
    382 /** Pointer to an allocation header. */
    383 typedef  IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
    384 /** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
    385 # define IEMEXECMEMALLOCHDR_MAGIC       UINT32_C(0x4d657845)
    386 #endif
    387 
    388 
    389 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
    390 
    391 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
    392 /**
    393  * Frees up executable memory when we're out space.
    394  *
    395  * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
    396  * space in a more linear fashion from the allocator's point of view.  It may
    397  * also defragment if implemented & enabled
    398  */
    399 static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
    400 {
    401 # ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
    402 #  error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
    403 # endif
    404     STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
    405 
    406     /*
    407      * Before we can start, we must process delayed frees.
    408      */
    409     iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
    410 
    411     AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
    412 
    413     uint32_t const cbChunk = pExecMemAllocator->cbChunk;
    414     AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
    415     AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
    416 
    417     uint32_t const cChunks = pExecMemAllocator->cChunks;
    418     AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
    419     AssertReturnVoid(cChunks >= 1);
    420 
    421     /*
    422      * Decide how much to prune.  The chunk is is a multiple of two, so we'll be
    423      * scanning a multiple of two here as well.
    424      */
    425     uint32_t cbToPrune = cbChunk;
    426 
    427     /* Never more than 25%. */
    428     if (cChunks < 4)
    429         cbToPrune /= cChunks == 1 ? 4 : 2;
    430 
    431     /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
    432     if (cbToPrune > _4M)
    433         cbToPrune = _4M;
    434 
    435     /*
    436      * Adjust the pruning chunk and offset accordingly.
    437      */
    438     uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
    439     uint32_t offChunk = pExecMemAllocator->offChunkPrune;
    440     offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
    441     if (offChunk >= cbChunk)
    442     {
    443         offChunk = 0;
    444         idxChunk += 1;
    445     }
    446     if (idxChunk >= cChunks)
    447     {
    448         offChunk = 0;
    449         idxChunk = 0;
    450     }
    451 
    452     uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
    453 
    454     /*
    455      * Do the pruning.  The current approach is the sever kind.
    456      */
    457     uint64_t            cbPruned = 0;
    458     uint8_t * const     pbChunk  = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunk;
    459     while (offChunk < offPruneEnd)
    460     {
    461         PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
    462 
    463         /* Is this the start of an allocation block for TB? (We typically have
    464            one allocation at the start of each chunk for the unwind info where
    465            pTb is NULL.)  */
    466         if (   pHdr->uMagic   == IEMEXECMEMALLOCHDR_MAGIC
    467             && pHdr->pTb      != NULL
    468             && pHdr->idxChunk == idxChunk)
    469         {
    470             PIEMTB const pTb = pHdr->pTb;
    471             AssertPtr(pTb);
    472             Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
    473 
    474             uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
    475                                                  IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    476             AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
    477 
    478             iemTbAllocatorFree(pVCpu, pTb);
    479 
    480             cbPruned += cbBlock;
    481             offChunk += cbBlock;
    482         }
    483         else
    484             offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
    485     }
    486     STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
    487 
    488     /*
    489      * Save the current pruning point.
    490      */
    491     pExecMemAllocator->offChunkPrune = offChunk;
    492     pExecMemAllocator->idxChunkPrune = idxChunk;
    493 
    494     STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
    495 }
    496 #endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
    497 
    498 
    499 /**
    500  * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
    501  * the heap statistics.
    502  */
    503 static void *iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
    504                                               uint32_t cbReq, uint32_t idxChunk)
    505 {
    506     pExecMemAllocator->cAllocations += 1;
    507     pExecMemAllocator->cbAllocated  += cbReq;
    508     pExecMemAllocator->cbFree       -= cbReq;
    509     pExecMemAllocator->idxChunkHint  = idxChunk;
    510 
    511 #ifdef RT_OS_DARWIN
    512     /*
    513      * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
    514      * on darwin.  So, we mark the pages returned as read+write after alloc and
    515      * expect the caller to call iemExecMemAllocatorReadyForUse when done
    516      * writing to the allocation.
    517      *
    518      * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    519      * for details.
    520      */
    521     /** @todo detect if this is necessary... it wasn't required on 10.15 or
    522      *        whatever older version it was. */
    523     int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
    524     AssertRC(rc);
    525 #endif
    526 
    527     return pvRet;
    528 }
    529 
    530 
    531 static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
    532                                                 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb)
    533 {
    534     /*
    535      * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
    536      */
    537     Assert(!(cToScan & 63));
    538     Assert(!(idxFirst & 63));
    539     Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
    540     pbmAlloc += idxFirst / 64;
    541 
    542     /*
    543      * Scan the bitmap for cReqUnits of consequtive clear bits
    544      */
    545     /** @todo This can probably be done more efficiently for non-x86 systems. */
    546     int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
    547     while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
    548     {
    549         uint32_t idxAddBit = 1;
    550         while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
    551             idxAddBit++;
    552         if (idxAddBit >= cReqUnits)
    553         {
    554             ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
    555 
    556             PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
    557             pChunk->cFreeUnits -= cReqUnits;
    558             pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
    559 
    560 # ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
    561             PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)((uint8_t *)pChunk->pvChunk
    562                                                              + (   (idxFirst + (uint32_t)iBit)
    563                                                                 << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT));
    564             pHdr->uMagic   = IEMEXECMEMALLOCHDR_MAGIC;
    565             pHdr->idxChunk = idxChunk;
    566             pHdr->pTb      = pTb;
    567             return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pHdr + 1,
    568                                                     cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
    569 #else
    570             RT_NOREF(pTb);
    571             void * const pvRet  = (uint8_t *)pChunk->pvChunk
    572                                 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
    573             return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
    574                                                     cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
    575 #endif
    576         }
    577 
    578         iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
    579     }
    580     return NULL;
    581 }
    582 
    583 
    584 static void *
    585 iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb)
    586 {
    587     /*
    588      * Figure out how much to allocate.
    589      */
    590 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
    591     uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
    592 #else
    593     uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
    594 #endif
    595                             >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    596     if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
    597     {
    598         uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    599         uint32_t const   idxHint  = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
    600         if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
    601         {
    602             void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
    603                                                              pExecMemAllocator->cUnitsPerChunk - idxHint,
    604                                                              cReqUnits, idxChunk, pTb);
    605             if (pvRet)
    606                 return pvRet;
    607         }
    608         return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
    609                                                   RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
    610                                                   cReqUnits, idxChunk, pTb);
    611     }
    612     return NULL;
    613 }
    614 
    615 
    616 /**
    617  * Allocates @a cbReq bytes of executable memory.
    618  *
    619  * @returns Pointer to the memory, NULL if out of memory or other problem
    620  *          encountered.
    621  * @param   pVCpu   The cross context virtual CPU structure of the calling
    622  *                  thread.
    623  * @param   cbReq   How many bytes are required.
    624  * @param   pTb     The translation block that will be using the allocation.
    625  */
    626 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb)
    627 {
    628     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    629     AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
    630     AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
    631     STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
    632 
    633     for (unsigned iIteration = 0;; iIteration++)
    634     {
    635         if (cbReq <= pExecMemAllocator->cbFree)
    636         {
    637             uint32_t const cChunks      = pExecMemAllocator->cChunks;
    638             uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
    639             for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
    640             {
    641                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
    642                 if (pvRet)
    643                 {
    644                     STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
    645                     return pvRet;
    646                 }
    647             }
    648             for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
    649             {
    650                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
    651                 if (pvRet)
    652                 {
    653                     STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
    654                     return pvRet;
    655                 }
    656             }
    657         }
    658 
    659         /*
    660          * Can we grow it with another chunk?
    661          */
    662         if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
    663         {
    664             int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    665             AssertLogRelRCReturn(rc, NULL);
    666 
    667             uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
    668             void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
    669             if (pvRet)
    670             {
    671                 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
    672                 return pvRet;
    673             }
    674             AssertFailed();
    675         }
    676 
    677         /*
    678          * Try prune native TBs once.
    679          */
    680         if (iIteration == 0)
    681         {
    682 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
    683             iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
    684 #else
    685             /* No header included in the instruction count here. */
    686             uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
    687             iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
    688 #endif
    689         }
    690         else
    691         {
    692             STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
    693             STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
    694             return NULL;
    695         }
    696     }
    697 }
    698 
    699 
    700 /** This is a hook that we may need later for changing memory protection back
    701  *  to readonly+exec */
    702 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
    703 {
    704 #ifdef RT_OS_DARWIN
    705     /* See iemExecMemAllocatorAllocTailCode for the explanation. */
    706     int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
    707     AssertRC(rc); RT_NOREF(pVCpu);
    708 
    709     /*
    710      * Flush the instruction cache:
    711      *      https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    712      */
    713     /* sys_dcache_flush(pv, cb); - not necessary */
    714     sys_icache_invalidate(pv, cb);
    715 #else
    716     RT_NOREF(pVCpu, pv, cb);
    717 #endif
    718 }
    719 
    720 
    721 /**
    722  * Frees executable memory.
    723  */
    724 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
    725 {
    726     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    727     Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
    728     AssertPtr(pv);
    729 #ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
    730     Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
    731 
    732     /* Align the size as we did when allocating the block. */
    733     cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    734 
    735 #else
    736     PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
    737     Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
    738     AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
    739     uint32_t const idxChunk = pHdr->idxChunk;
    740     AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
    741     pv = pHdr;
    742 
    743     /* Adjust and align the size to cover the whole allocation area. */
    744     cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    745 #endif
    746 
    747     /* Free it / assert sanity. */
    748     bool           fFound  = false;
    749     uint32_t const cbChunk = pExecMemAllocator->cbChunk;
    750 #ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
    751     uint32_t const cChunks = pExecMemAllocator->cChunks;
    752     for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
    753 #endif
    754     {
    755         uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
    756         fFound = offChunk < cbChunk;
    757         if (fFound)
    758         {
    759             uint32_t const idxFirst  = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    760             uint32_t const cReqUnits = (uint32_t)cb       >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    761 
    762             /* Check that it's valid and free it. */
    763             uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    764             AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
    765             for (uint32_t i = 1; i < cReqUnits; i++)
    766                 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
    767             ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
    768 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
    769             pHdr->uMagic    = 0;
    770             pHdr->idxChunk  = 0;
    771             pHdr->pTb       = NULL;
    772 #endif
    773             pExecMemAllocator->aChunks[idxChunk].cFreeUnits  += cReqUnits;
    774             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = idxFirst;
    775 
    776             /* Update the stats. */
    777             pExecMemAllocator->cbAllocated  -= cb;
    778             pExecMemAllocator->cbFree       += cb;
    779             pExecMemAllocator->cAllocations -= 1;
    780             return;
    781         }
    782     }
    783     AssertFailed();
    784 }
    785 
    786 
    787 
    788 #ifdef IN_RING3
    789 # ifdef RT_OS_WINDOWS
    790 
    791 /**
    792  * Initializes the unwind info structures for windows hosts.
    793  */
    794 static int
    795 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    796                                                      void *pvChunk, uint32_t idxChunk)
    797 {
    798     RT_NOREF(pVCpu);
    799 
    800     /*
    801      * The AMD64 unwind opcodes.
    802      *
    803      * This is a program that starts with RSP after a RET instruction that
    804      * ends up in recompiled code, and the operations we describe here will
    805      * restore all non-volatile registers and bring RSP back to where our
    806      * RET address is.  This means it's reverse order from what happens in
    807      * the prologue.
    808      *
    809      * Note! Using a frame register approach here both because we have one
    810      *       and but mainly because the UWOP_ALLOC_LARGE argument values
    811      *       would be a pain to write initializers for.  On the positive
    812      *       side, we're impervious to changes in the the stack variable
    813      *       area can can deal with dynamic stack allocations if necessary.
    814      */
    815     static const IMAGE_UNWIND_CODE s_aOpcodes[] =
    816     {
    817         { { 16, IMAGE_AMD64_UWOP_SET_FPREG,     0 } },              /* RSP  = RBP - FrameOffset * 10 (0x60) */
    818         { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL,   0 } },              /* RSP += 8; */
    819         { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x15 } },   /* R15  = [RSP]; RSP += 8; */
    820         { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x14 } },   /* R14  = [RSP]; RSP += 8; */
    821         { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x13 } },   /* R13  = [RSP]; RSP += 8; */
    822         { {  8, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x12 } },   /* R12  = [RSP]; RSP += 8; */
    823         { {  7, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xDI } },   /* RDI  = [RSP]; RSP += 8; */
    824         { {  6, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xSI } },   /* RSI  = [RSP]; RSP += 8; */
    825         { {  5, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBX } },   /* RBX  = [RSP]; RSP += 8; */
    826         { {  4, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBP } },   /* RBP  = [RSP]; RSP += 8; */
    827     };
    828     union
    829     {
    830         IMAGE_UNWIND_INFO Info;
    831         uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
    832     } s_UnwindInfo =
    833     {
    834         {
    835             /* .Version = */        1,
    836             /* .Flags = */          0,
    837             /* .SizeOfProlog = */   16, /* whatever */
    838             /* .CountOfCodes = */   RT_ELEMENTS(s_aOpcodes),
    839             /* .FrameRegister = */  X86_GREG_xBP,
    840             /* .FrameOffset = */    (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
    841         }
    842     };
    843     AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
    844     AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
    845 
    846     /*
    847      * Calc how much space we need and allocate it off the exec heap.
    848      */
    849     unsigned const cFunctionEntries = 1;
    850     unsigned const cbUnwindInfo     = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
    851     unsigned const cbNeeded         = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
    852     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
    853         = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL);
    854     AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
    855     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
    856 
    857     /*
    858      * Initialize the structures.
    859      */
    860     PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
    861 
    862     paFunctions[0].BeginAddress         = 0;
    863     paFunctions[0].EndAddress           = pExecMemAllocator->cbChunk;
    864     paFunctions[0].UnwindInfoAddress    = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
    865 
    866     memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
    867     memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
    868 
    869     /*
    870      * Register it.
    871      */
    872     uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
    873     AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
    874 
    875     return VINF_SUCCESS;
    876 }
    877 
    878 
    879 # else /* !RT_OS_WINDOWS */
    880 
    881 /**
    882  * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
    883  */
    884 DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
    885 {
    886     if (iValue >= 64)
    887     {
    888         Assert(iValue < 0x2000);
    889         *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
    890         *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
    891     }
    892     else if (iValue >= 0)
    893         *Ptr.pb++ = (uint8_t)iValue;
    894     else if (iValue > -64)
    895         *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
    896     else
    897     {
    898         Assert(iValue > -0x2000);
    899         *Ptr.pb++ = ((uint8_t)iValue & 0x7f)        | 0x80;
    900         *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
    901     }
    902     return Ptr;
    903 }
    904 
    905 
    906 /**
    907  * Emits an ULEB128 encoded value (up to 64-bit wide).
    908  */
    909 DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
    910 {
    911     while (uValue >= 0x80)
    912     {
    913         *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
    914         uValue  >>= 7;
    915     }
    916     *Ptr.pb++ = (uint8_t)uValue;
    917     return Ptr;
    918 }
    919 
    920 
    921 /**
    922  * Emits a CFA rule as register @a uReg + offset @a off.
    923  */
    924 DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    925 {
    926     *Ptr.pb++ = DW_CFA_def_cfa;
    927     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    928     Ptr = iemDwarfPutUleb128(Ptr, off);
    929     return Ptr;
    930 }
    931 
    932 
    933 /**
    934  * Emits a register (@a uReg) save location:
    935  *      CFA + @a off * data_alignment_factor
    936  */
    937 DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    938 {
    939     if (uReg < 0x40)
    940         *Ptr.pb++ = DW_CFA_offset | uReg;
    941     else
    942     {
    943         *Ptr.pb++ = DW_CFA_offset_extended;
    944         Ptr = iemDwarfPutUleb128(Ptr, uReg);
    945     }
    946     Ptr = iemDwarfPutUleb128(Ptr, off);
    947     return Ptr;
    948 }
    949 
    950 
    951 #  if 0 /* unused */
    952 /**
    953  * Emits a register (@a uReg) save location, using signed offset:
    954  *      CFA + @a offSigned * data_alignment_factor
    955  */
    956 DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
    957 {
    958     *Ptr.pb++ = DW_CFA_offset_extended_sf;
    959     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    960     Ptr = iemDwarfPutLeb128(Ptr, offSigned);
    961     return Ptr;
    962 }
    963 #  endif
    964 
    965 
    966 /**
    967  * Initializes the unwind info section for non-windows hosts.
    968  */
    969 static int
    970 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    971                                                      void *pvChunk, uint32_t idxChunk)
    972 {
    973     PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
    974     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
    975 
    976     RTPTRUNION Ptr = { pEhFrame->abEhFrame };
    977 
    978     /*
    979      * Generate the CIE first.
    980      */
    981 #  ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
    982     uint8_t const iDwarfVer = 3;
    983 #  else
    984     uint8_t const iDwarfVer = 4;
    985 #  endif
    986     RTPTRUNION const PtrCie = Ptr;
    987     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    988     *Ptr.pu32++ = 0 /*UINT32_MAX*/;                         /* I'm a CIE in .eh_frame speak. */
    989     *Ptr.pb++   = iDwarfVer;                                /* DwARF version */
    990     *Ptr.pb++   = 0;                                        /* Augmentation. */
    991     if (iDwarfVer >= 4)
    992     {
    993         *Ptr.pb++   = sizeof(uintptr_t);                    /* Address size. */
    994         *Ptr.pb++   = 0;                                    /* Segment selector size. */
    995     }
    996 #  ifdef RT_ARCH_AMD64
    997     Ptr = iemDwarfPutLeb128(Ptr, 1);                        /* Code alignment factor (LEB128 = 1). */
    998 #  else
    999     Ptr = iemDwarfPutLeb128(Ptr, 4);                        /* Code alignment factor (LEB128 = 4). */
    1000 #  endif
    1001     Ptr = iemDwarfPutLeb128(Ptr, -8);                       /* Data alignment factor (LEB128 = -8). */
    1002 #  ifdef RT_ARCH_AMD64
    1003     Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA);          /* Return address column (ULEB128) */
    1004 #  elif defined(RT_ARCH_ARM64)
    1005     Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR);          /* Return address column (ULEB128) */
    1006 #  else
    1007 #   error "port me"
    1008 #  endif
    1009     /* Initial instructions: */
    1010 #  ifdef RT_ARCH_AMD64
    1011     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16);   /* CFA     = RBP + 0x10 - first stack parameter */
    1012     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA,  1);    /* Ret RIP = [CFA + 1*-8] */
    1013     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2);    /* RBP     = [CFA + 2*-8] */
    1014     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3);    /* RBX     = [CFA + 3*-8] */
    1015     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4);    /* R12     = [CFA + 4*-8] */
    1016     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5);    /* R13     = [CFA + 5*-8] */
    1017     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6);    /* R14     = [CFA + 6*-8] */
    1018     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7);    /* R15     = [CFA + 7*-8] */
    1019 #  elif defined(RT_ARCH_ARM64)
    1020 #   if 1
    1021     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP,  16);   /* CFA     = BP + 0x10 - first stack parameter */
    1022 #   else
    1023     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP,  IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
    1024 #   endif
    1025     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR,   1);   /* Ret PC  = [CFA + 1*-8] */
    1026     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP,   2);   /* Ret BP  = [CFA + 2*-8] */
    1027     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28,  3);   /* X28     = [CFA + 3*-8] */
    1028     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27,  4);   /* X27     = [CFA + 4*-8] */
    1029     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26,  5);   /* X26     = [CFA + 5*-8] */
    1030     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25,  6);   /* X25     = [CFA + 6*-8] */
    1031     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24,  7);   /* X24     = [CFA + 7*-8] */
    1032     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23,  8);   /* X23     = [CFA + 8*-8] */
    1033     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22,  9);   /* X22     = [CFA + 9*-8] */
    1034     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10);   /* X21     = [CFA +10*-8] */
    1035     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11);   /* X20     = [CFA +11*-8] */
    1036     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12);   /* X19     = [CFA +12*-8] */
    1037     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    1038     /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
    1039 #  else
    1040 #   error "port me"
    1041 #  endif
    1042     while ((Ptr.u - PtrCie.u) & 3)
    1043         *Ptr.pb++ = DW_CFA_nop;
    1044     /* Finalize the CIE size. */
    1045     *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
    1046 
    1047     /*
    1048      * Generate an FDE for the whole chunk area.
    1049      */
    1050 #  ifdef IEMNATIVE_USE_LIBUNWIND
    1051     pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
    1052 #  endif
    1053     RTPTRUNION const PtrFde = Ptr;
    1054     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    1055     *Ptr.pu32   = Ptr.u - PtrCie.u;                         /* Negated self relative CIE address. */
    1056     Ptr.pu32++;
    1057     *Ptr.pu64++ = (uintptr_t)pvChunk;                       /* Absolute start PC of this FDE. */
    1058     *Ptr.pu64++ = pExecMemAllocator->cbChunk;               /* PC range length for this PDE. */
    1059 #  if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
    1060     *Ptr.pb++ = DW_CFA_nop;
    1061 #  endif
    1062     while ((Ptr.u - PtrFde.u) & 3)
    1063         *Ptr.pb++ = DW_CFA_nop;
    1064     /* Finalize the FDE size. */
    1065     *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
    1066 
    1067     /* Terminator entry. */
    1068     *Ptr.pu32++ = 0;
    1069     *Ptr.pu32++ = 0;            /* just to be sure... */
    1070     Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
    1071 
    1072     /*
    1073      * Register it.
    1074      */
    1075 #  ifdef IEMNATIVE_USE_LIBUNWIND
    1076     __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
    1077 #  else
    1078     memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
    1079     __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
    1080 #  endif
    1081 
    1082 #  ifdef IEMNATIVE_USE_GDB_JIT
    1083     /*
    1084      * Now for telling GDB about this (experimental).
    1085      *
    1086      * This seems to work best with ET_DYN.
    1087      */
    1088     GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
    1089                                                                                       sizeof(GDBJITSYMFILE), NULL);
    1090     AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
    1091     unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
    1092 
    1093     RT_ZERO(*pSymFile);
    1094 
    1095     /*
    1096      * The ELF header:
    1097      */
    1098     pSymFile->EHdr.e_ident[0]           = ELFMAG0;
    1099     pSymFile->EHdr.e_ident[1]           = ELFMAG1;
    1100     pSymFile->EHdr.e_ident[2]           = ELFMAG2;
    1101     pSymFile->EHdr.e_ident[3]           = ELFMAG3;
    1102     pSymFile->EHdr.e_ident[EI_VERSION]  = EV_CURRENT;
    1103     pSymFile->EHdr.e_ident[EI_CLASS]    = ELFCLASS64;
    1104     pSymFile->EHdr.e_ident[EI_DATA]     = ELFDATA2LSB;
    1105     pSymFile->EHdr.e_ident[EI_OSABI]    = ELFOSABI_NONE;
    1106 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1107     pSymFile->EHdr.e_type               = ET_DYN;
    1108 #   else
    1109     pSymFile->EHdr.e_type               = ET_REL;
    1110 #   endif
    1111 #   ifdef RT_ARCH_AMD64
    1112     pSymFile->EHdr.e_machine            = EM_AMD64;
    1113 #   elif defined(RT_ARCH_ARM64)
    1114     pSymFile->EHdr.e_machine            = EM_AARCH64;
    1115 #   else
    1116 #    error "port me"
    1117 #   endif
    1118     pSymFile->EHdr.e_version            = 1; /*?*/
    1119     pSymFile->EHdr.e_entry              = 0;
    1120 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1121     pSymFile->EHdr.e_phoff              = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
    1122 #   else
    1123     pSymFile->EHdr.e_phoff              = 0;
    1124 #   endif
    1125     pSymFile->EHdr.e_shoff              = sizeof(pSymFile->EHdr);
    1126     pSymFile->EHdr.e_flags              = 0;
    1127     pSymFile->EHdr.e_ehsize             = sizeof(pSymFile->EHdr);
    1128 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1129     pSymFile->EHdr.e_phentsize          = sizeof(pSymFile->aPhdrs[0]);
    1130     pSymFile->EHdr.e_phnum              = RT_ELEMENTS(pSymFile->aPhdrs);
    1131 #   else
    1132     pSymFile->EHdr.e_phentsize          = 0;
    1133     pSymFile->EHdr.e_phnum              = 0;
    1134 #   endif
    1135     pSymFile->EHdr.e_shentsize          = sizeof(pSymFile->aShdrs[0]);
    1136     pSymFile->EHdr.e_shnum              = RT_ELEMENTS(pSymFile->aShdrs);
    1137     pSymFile->EHdr.e_shstrndx           = 0; /* set later */
    1138 
    1139     uint32_t offStrTab = 0;
    1140 #define APPEND_STR(a_szStr) do { \
    1141         memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
    1142         offStrTab += sizeof(a_szStr); \
    1143         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1144     } while (0)
    1145 #define APPEND_STR_FMT(a_szStr, ...) do { \
    1146         offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
    1147         offStrTab++; \
    1148         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1149     } while (0)
    1150 
    1151     /*
    1152      * Section headers.
    1153      */
    1154     /* Section header #0: NULL */
    1155     unsigned i = 0;
    1156     APPEND_STR("");
    1157     RT_ZERO(pSymFile->aShdrs[i]);
    1158     i++;
    1159 
    1160     /* Section header: .eh_frame */
    1161     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1162     APPEND_STR(".eh_frame");
    1163     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1164     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1165 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1166     pSymFile->aShdrs[i].sh_offset
    1167         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
    1168 #   else
    1169     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->abEhFrame[0];
    1170     pSymFile->aShdrs[i].sh_offset       = 0;
    1171 #   endif
    1172 
    1173     pSymFile->aShdrs[i].sh_size         = sizeof(pEhFrame->abEhFrame);
    1174     pSymFile->aShdrs[i].sh_link         = 0;
    1175     pSymFile->aShdrs[i].sh_info         = 0;
    1176     pSymFile->aShdrs[i].sh_addralign    = 1;
    1177     pSymFile->aShdrs[i].sh_entsize      = 0;
    1178     memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
    1179     i++;
    1180 
    1181     /* Section header: .shstrtab */
    1182     unsigned const iShStrTab = i;
    1183     pSymFile->EHdr.e_shstrndx           = iShStrTab;
    1184     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1185     APPEND_STR(".shstrtab");
    1186     pSymFile->aShdrs[i].sh_type         = SHT_STRTAB;
    1187     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1188 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1189     pSymFile->aShdrs[i].sh_offset
    1190         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1191 #   else
    1192     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->szzStrTab[0];
    1193     pSymFile->aShdrs[i].sh_offset       = 0;
    1194 #   endif
    1195     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->szzStrTab);
    1196     pSymFile->aShdrs[i].sh_link         = 0;
    1197     pSymFile->aShdrs[i].sh_info         = 0;
    1198     pSymFile->aShdrs[i].sh_addralign    = 1;
    1199     pSymFile->aShdrs[i].sh_entsize      = 0;
    1200     i++;
    1201 
    1202     /* Section header: .symbols */
    1203     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1204     APPEND_STR(".symtab");
    1205     pSymFile->aShdrs[i].sh_type         = SHT_SYMTAB;
    1206     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1207     pSymFile->aShdrs[i].sh_offset
    1208         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
    1209     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aSymbols);
    1210     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1211     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aSymbols);
    1212     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aSymbols[0].st_value);
    1213     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aSymbols[0]);
    1214     i++;
    1215 
    1216 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1217     /* Section header: .symbols */
    1218     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1219     APPEND_STR(".dynsym");
    1220     pSymFile->aShdrs[i].sh_type         = SHT_DYNSYM;
    1221     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1222     pSymFile->aShdrs[i].sh_offset
    1223         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1224     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDynSyms);
    1225     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1226     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aDynSyms);
    1227     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aDynSyms[0].st_value);
    1228     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDynSyms[0]);
    1229     i++;
    1230 #   endif
    1231 
    1232 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1233     /* Section header: .dynamic */
    1234     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1235     APPEND_STR(".dynamic");
    1236     pSymFile->aShdrs[i].sh_type         = SHT_DYNAMIC;
    1237     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1238     pSymFile->aShdrs[i].sh_offset
    1239         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1240     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDyn);
    1241     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1242     pSymFile->aShdrs[i].sh_info         = 0;
    1243     pSymFile->aShdrs[i].sh_addralign    = 1;
    1244     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDyn[0]);
    1245     i++;
    1246 #   endif
    1247 
    1248     /* Section header: .text */
    1249     unsigned const iShText = i;
    1250     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1251     APPEND_STR(".text");
    1252     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1253     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1254 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1255     pSymFile->aShdrs[i].sh_offset
    1256         = pSymFile->aShdrs[i].sh_addr   = sizeof(GDBJITSYMFILE);
    1257 #   else
    1258     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)(pSymFile + 1);
    1259     pSymFile->aShdrs[i].sh_offset       = 0;
    1260 #   endif
    1261     pSymFile->aShdrs[i].sh_size         = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
    1262     pSymFile->aShdrs[i].sh_link         = 0;
    1263     pSymFile->aShdrs[i].sh_info         = 0;
    1264     pSymFile->aShdrs[i].sh_addralign    = 1;
    1265     pSymFile->aShdrs[i].sh_entsize      = 0;
    1266     i++;
    1267 
    1268     Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
    1269 
    1270 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1271     /*
    1272      * The program headers:
    1273      */
    1274     /* Everything in a single LOAD segment: */
    1275     i = 0;
    1276     pSymFile->aPhdrs[i].p_type          = PT_LOAD;
    1277     pSymFile->aPhdrs[i].p_flags         = PF_X | PF_R;
    1278     pSymFile->aPhdrs[i].p_offset
    1279         = pSymFile->aPhdrs[i].p_vaddr
    1280         = pSymFile->aPhdrs[i].p_paddr   = 0;
    1281     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1282         = pSymFile->aPhdrs[i].p_memsz   = pExecMemAllocator->cbChunk - offSymFileInChunk;
    1283     pSymFile->aPhdrs[i].p_align         = HOST_PAGE_SIZE;
    1284     i++;
    1285     /* The .dynamic segment. */
    1286     pSymFile->aPhdrs[i].p_type          = PT_DYNAMIC;
    1287     pSymFile->aPhdrs[i].p_flags         = PF_R;
    1288     pSymFile->aPhdrs[i].p_offset
    1289         = pSymFile->aPhdrs[i].p_vaddr
    1290         = pSymFile->aPhdrs[i].p_paddr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1291     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1292         = pSymFile->aPhdrs[i].p_memsz   = sizeof(pSymFile->aDyn);
    1293     pSymFile->aPhdrs[i].p_align         = sizeof(pSymFile->aDyn[0].d_tag);
    1294     i++;
    1295 
    1296     Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
    1297 
    1298     /*
    1299      * The dynamic section:
    1300      */
    1301     i = 0;
    1302     pSymFile->aDyn[i].d_tag             = DT_SONAME;
    1303     pSymFile->aDyn[i].d_un.d_val        = offStrTab;
    1304     APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
    1305     i++;
    1306     pSymFile->aDyn[i].d_tag             = DT_STRTAB;
    1307     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1308     i++;
    1309     pSymFile->aDyn[i].d_tag             = DT_STRSZ;
    1310     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->szzStrTab);
    1311     i++;
    1312     pSymFile->aDyn[i].d_tag             = DT_SYMTAB;
    1313     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1314     i++;
    1315     pSymFile->aDyn[i].d_tag             = DT_SYMENT;
    1316     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->aDynSyms[0]);
    1317     i++;
    1318     pSymFile->aDyn[i].d_tag             = DT_NULL;
    1319     i++;
    1320     Assert(i == RT_ELEMENTS(pSymFile->aDyn));
    1321 #   endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
    1322 
    1323     /*
    1324      * Symbol tables:
    1325      */
    1326     /** @todo gdb doesn't seem to really like this ...   */
    1327     i = 0;
    1328     pSymFile->aSymbols[i].st_name       = 0;
    1329     pSymFile->aSymbols[i].st_shndx      = SHN_UNDEF;
    1330     pSymFile->aSymbols[i].st_value      = 0;
    1331     pSymFile->aSymbols[i].st_size       = 0;
    1332     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
    1333     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1334 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1335     pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
    1336 #   endif
    1337     i++;
    1338 
    1339     pSymFile->aSymbols[i].st_name       = 0;
    1340     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1341     pSymFile->aSymbols[i].st_value      = 0;
    1342     pSymFile->aSymbols[i].st_size       = 0;
    1343     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
    1344     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1345     i++;
    1346 
    1347     pSymFile->aSymbols[i].st_name       = offStrTab;
    1348     APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
    1349 #   if 0
    1350     pSymFile->aSymbols[i].st_shndx      = iShText;
    1351     pSymFile->aSymbols[i].st_value      = 0;
    1352 #   else
    1353     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1354     pSymFile->aSymbols[i].st_value      = (uintptr_t)(pSymFile + 1);
    1355 #   endif
    1356     pSymFile->aSymbols[i].st_size       = pSymFile->aShdrs[iShText].sh_size;
    1357     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
    1358     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1359 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1360     pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
    1361     pSymFile->aDynSyms[1].st_value      = (uintptr_t)(pSymFile + 1);
    1362 #   endif
    1363     i++;
    1364 
    1365     Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
    1366     Assert(offStrTab < sizeof(pSymFile->szzStrTab));
    1367 
    1368     /*
    1369      * The GDB JIT entry and informing GDB.
    1370      */
    1371     pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
    1372 #   if 1
    1373     pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
    1374 #   else
    1375     pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
    1376 #   endif
    1377 
    1378     RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
    1379     RTCritSectEnter(&g_IemNativeGdbJitLock);
    1380     pEhFrame->GdbJitEntry.pNext      = NULL;
    1381     pEhFrame->GdbJitEntry.pPrev      = __jit_debug_descriptor.pTail;
    1382     if (__jit_debug_descriptor.pTail)
    1383         __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
    1384     else
    1385         __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
    1386     __jit_debug_descriptor.pTail     = &pEhFrame->GdbJitEntry;
    1387     __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
    1388 
    1389     /* Notify GDB: */
    1390     __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
    1391     __jit_debug_register_code();
    1392     __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
    1393     RTCritSectLeave(&g_IemNativeGdbJitLock);
    1394 
    1395 #  else  /* !IEMNATIVE_USE_GDB_JIT */
    1396     RT_NOREF(pVCpu);
    1397 #  endif /* !IEMNATIVE_USE_GDB_JIT */
    1398 
    1399     return VINF_SUCCESS;
    1400 }
    1401 
    1402 # endif /* !RT_OS_WINDOWS */
    1403 #endif /* IN_RING3 */
    1404 
    1405 
    1406 /**
    1407  * Adds another chunk to the executable memory allocator.
    1408  *
    1409  * This is used by the init code for the initial allocation and later by the
    1410  * regular allocator function when it's out of memory.
    1411  */
    1412 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
    1413 {
    1414     /* Check that we've room for growth. */
    1415     uint32_t const idxChunk = pExecMemAllocator->cChunks;
    1416     AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
    1417 
    1418     /* Allocate a chunk. */
    1419 #ifdef RT_OS_DARWIN
    1420     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
    1421 #else
    1422     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
    1423 #endif
    1424     AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
    1425 
    1426     /*
    1427      * Add the chunk.
    1428      *
    1429      * This must be done before the unwind init so windows can allocate
    1430      * memory from the chunk when using the alternative sub-allocator.
    1431      */
    1432     pExecMemAllocator->aChunks[idxChunk].pvChunk      = pvChunk;
    1433 #ifdef IN_RING3
    1434     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
    1435 #endif
    1436     pExecMemAllocator->aChunks[idxChunk].cFreeUnits   = pExecMemAllocator->cUnitsPerChunk;
    1437     pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = 0;
    1438     memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1439            0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1440 
    1441     pExecMemAllocator->cChunks      = idxChunk + 1;
    1442     pExecMemAllocator->idxChunkHint = idxChunk;
    1443 
    1444     pExecMemAllocator->cbTotal     += pExecMemAllocator->cbChunk;
    1445     pExecMemAllocator->cbFree      += pExecMemAllocator->cbChunk;
    1446 
    1447 #ifdef IN_RING3
    1448     /*
    1449      * Initialize the unwind information (this cannot really fail atm).
    1450      * (This sets pvUnwindInfo.)
    1451      */
    1452     int rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
    1453     if (RT_SUCCESS(rc))
    1454     { /* likely */ }
    1455     else
    1456     {
    1457         /* Just in case the impossible happens, undo the above up: */
    1458         pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
    1459         pExecMemAllocator->cbFree  -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1460         pExecMemAllocator->cChunks  = idxChunk;
    1461         memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1462                0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1463         pExecMemAllocator->aChunks[idxChunk].pvChunk    = NULL;
    1464         pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
    1465 
    1466         RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
    1467         return rc;
    1468     }
    1469 #endif
    1470     return VINF_SUCCESS;
    1471 }
    1472 
    1473 
    1474 /**
    1475  * Initializes the executable memory allocator for native recompilation on the
    1476  * calling EMT.
    1477  *
    1478  * @returns VBox status code.
    1479  * @param   pVCpu       The cross context virtual CPU structure of the calling
    1480  *                      thread.
    1481  * @param   cbMax       The max size of the allocator.
    1482  * @param   cbInitial   The initial allocator size.
    1483  * @param   cbChunk     The chunk size, 0 or UINT32_MAX for default (@a cbMax
    1484  *                      dependent).
    1485  */
    1486 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
    1487 {
    1488     /*
    1489      * Validate input.
    1490      */
    1491     AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
    1492     AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
    1493     AssertLogRelMsgReturn(   cbChunk != UINT32_MAX
    1494                           || cbChunk == 0
    1495                           || (   RT_IS_POWER_OF_TWO(cbChunk)
    1496                               && cbChunk >= _1M
    1497                               && cbChunk <= _256M
    1498                               && cbChunk <= cbMax),
    1499                           ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
    1500                           VERR_OUT_OF_RANGE);
    1501 
    1502     /*
    1503      * Adjust/figure out the chunk size.
    1504      */
    1505     if (cbChunk == 0 || cbChunk == UINT32_MAX)
    1506     {
    1507         if (cbMax >= _256M)
    1508             cbChunk = _64M;
    1509         else
    1510         {
    1511             if (cbMax < _16M)
    1512                 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
    1513             else
    1514                 cbChunk = (uint32_t)cbMax / 4;
    1515             if (!RT_IS_POWER_OF_TWO(cbChunk))
    1516                 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
    1517         }
    1518     }
    1519 
    1520     if (cbChunk > cbMax)
    1521         cbMax = cbChunk;
    1522     else
    1523         cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
    1524     uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
    1525     AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
    1526 
    1527     /*
    1528      * Allocate and initialize the allocatore instance.
    1529      */
    1530     size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
    1531     size_t const cbBitmaps  = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
    1532     size_t       cbNeeded   = offBitmaps + cbBitmaps;
    1533     AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
    1534     Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
    1535 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1536     size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1537     cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
    1538 #endif
    1539     PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
    1540     AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
    1541                           VERR_NO_MEMORY);
    1542     pExecMemAllocator->uMagic       = IEMEXECMEMALLOCATOR_MAGIC;
    1543     pExecMemAllocator->cbChunk      = cbChunk;
    1544     pExecMemAllocator->cMaxChunks   = cMaxChunks;
    1545     pExecMemAllocator->cChunks      = 0;
    1546     pExecMemAllocator->idxChunkHint = 0;
    1547     pExecMemAllocator->cAllocations = 0;
    1548     pExecMemAllocator->cbTotal      = 0;
    1549     pExecMemAllocator->cbFree       = 0;
    1550     pExecMemAllocator->cbAllocated  = 0;
    1551     pExecMemAllocator->pbmAlloc                 = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
    1552     pExecMemAllocator->cUnitsPerChunk           = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1553     pExecMemAllocator->cBitmapElementsPerChunk  = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
    1554     memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
    1555 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1556     pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
    1557 #endif
    1558     for (uint32_t i = 0; i < cMaxChunks; i++)
    1559     {
    1560         pExecMemAllocator->aChunks[i].cFreeUnits   = 0;
    1561         pExecMemAllocator->aChunks[i].idxFreeHint  = 0;
    1562         pExecMemAllocator->aChunks[i].pvChunk      = NULL;
    1563 #ifdef IN_RING0
    1564         pExecMemAllocator->aChunks[i].hMemObj      = NIL_RTR0MEMOBJ;
    1565 #else
    1566         pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
    1567 #endif
    1568     }
    1569     pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
    1570 
    1571     /*
    1572      * Do the initial allocations.
    1573      */
    1574     while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
    1575     {
    1576         int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    1577         AssertLogRelRCReturn(rc, rc);
    1578     }
    1579 
    1580     pExecMemAllocator->idxChunkHint = 0;
    1581 
    1582     /*
    1583      * Register statistics.
    1584      */
    1585     PUVM const pUVM = pVCpu->pUVCpu->pUVM;
    1586     STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations,    STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
    1587                      "Current number of allocations",           "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
    1588     STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks,         STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    1589                      "Currently allocated chunks",              "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
    1590     STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks,      STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    1591                      "Maximum number of chunks",                "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
    1592     STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk,         STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
    1593                      "Allocation chunk size",                   "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
    1594     STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated,     STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
    1595                      "Number of bytes current allocated",       "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
    1596     STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree,          STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
    1597                      "Number of bytes current free",            "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
    1598     STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal,         STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
    1599                      "Total number of byte",                    "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
    1600 #ifdef VBOX_WITH_STATISTICS
    1601     STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc,       STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
    1602                      "Profiling the allocator",                 "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
    1603 #endif
    1604 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
    1605     STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf,   STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
    1606                      "Pruning executable memory (alt)",         "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
    1607     STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
    1608                      "Bytes recovered while pruning",           "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
    1609 #endif
    1610 
    1611     return VINF_SUCCESS;
    1612 }
    1613115
    1614116
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r104114 r104115  
    61396139DECLHIDDEN(PIEMTB)  iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT;
    61406140DECLHIDDEN(void)    iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT;
    6141 int                 iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk);
    6142 void                iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb);
     6141int                 iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT;
     6142DECLHIDDEN(void *)  iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) RT_NOEXCEPT;
     6143DECLHIDDEN(void)    iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT;
     6144void                iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT;
    61436145DECLASM(DECL_NO_RETURN(void)) iemNativeTbLongJmp(void *pvFramePointer, int rc) RT_NOEXCEPT;
    61446146
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette