Changeset 104115 in vbox for trunk/src/VBox
- Timestamp:
- Mar 29, 2024 2:11:56 AM (12 months ago)
- svn:sync-xref-src-repo-rev:
- 162522
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 3 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/Makefile.kmk
r103808 r104115 262 262 ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER 263 263 VBoxVMM_SOURCES += \ 264 VMMAll/IEMAllN8veExecMem.cpp \ 264 265 VMMAll/IEMAllN8veRecompiler.cpp \ 265 266 VMMAll/IEMAllN8veRecompFuncs1.cpp \ -
trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp
r104114 r104115 47 47 #define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE 48 48 #define IEM_WITH_OPAQUE_DECODER_STATE 49 #define VMCPU_INCL_CPUM_GST_CTX50 49 #define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */ 51 50 #include <VBox/vmm/iem.h> 52 51 #include <VBox/vmm/cpum.h> 53 #include <VBox/vmm/dbgf.h>54 52 #include "IEMInternal.h" 55 53 #include <VBox/vmm/vmcc.h> 56 54 #include <VBox/log.h> 57 55 #include <VBox/err.h> 58 #include <VBox/dis.h>59 56 #include <VBox/param.h> 60 57 #include <iprt/assert.h> … … 91 88 # endif 92 89 #endif 93 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER 94 # include "/opt/local/include/capstone/capstone.h" 95 #endif 96 97 #include "IEMInline.h" 98 #include "IEMThreadedFunctions.h" 90 99 91 #include "IEMN8veRecompiler.h" 100 #include "IEMN8veRecompilerEmit.h"101 #include "IEMN8veRecompilerTlbLookup.h"102 #include "IEMNativeFunctions.h"103 104 105 /*106 * Narrow down configs here to avoid wasting time on unused configs here.107 * Note! Same checks in IEMAllThrdRecompiler.cpp.108 */109 110 #ifndef IEM_WITH_CODE_TLB111 # error The code TLB must be enabled for the recompiler.112 #endif113 114 #ifndef IEM_WITH_DATA_TLB115 # error The data TLB must be enabled for the recompiler.116 #endif117 118 #ifndef IEM_WITH_SETJMP119 # error The setjmp approach must be enabled for the recompiler.120 #endif121 122 /** @todo eliminate this clang build hack. */123 #if RT_CLANG_PREREQ(4, 0)124 # pragma GCC diagnostic ignored "-Wunused-function"125 #endif126 127 128 /*********************************************************************************************************************************129 * Internal Functions *130 *********************************************************************************************************************************/131 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO132 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);133 #endif134 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);135 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,136 IEMNATIVEGSTREG enmGstReg, uint32_t off);137 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);138 92 139 93 … … 624 578 * @param pTb The translation block that will be using the allocation. 625 579 */ 626 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) 580 DECLHIDDEN(void *) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) RT_NOEXCEPT 627 581 { 628 582 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3; … … 700 654 /** This is a hook that we may need later for changing memory protection back 701 655 * to readonly+exec */ 702 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) 656 DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT 703 657 { 704 658 #ifdef RT_OS_DARWIN … … 722 676 * Frees executable memory. 723 677 */ 724 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) 678 DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT 725 679 { 726 680 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3; … … 1484 1438 * dependent). 1485 1439 */ 1486 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) 1440 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT 1487 1441 { 1488 1442 /* … … 1612 1566 } 1613 1567 1614 1615 /*********************************************************************************************************************************1616 * Native Recompilation *1617 *********************************************************************************************************************************/1618 1619 1620 /**1621 * Used by TB code when encountering a non-zero status or rcPassUp after a call.1622 */1623 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))1624 {1625 pVCpu->iem.s.cInstructions += idxInstr;1626 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));1627 }1628 1629 1630 /**1631 * Used by TB code when it wants to raise a \#DE.1632 */1633 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))1634 {1635 iemRaiseDivideErrorJmp(pVCpu);1636 #ifndef _MSC_VER1637 return VINF_IEM_RAISED_XCPT; /* not reached */1638 #endif1639 }1640 1641 1642 /**1643 * Used by TB code when it wants to raise a \#UD.1644 */1645 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))1646 {1647 iemRaiseUndefinedOpcodeJmp(pVCpu);1648 #ifndef _MSC_VER1649 return VINF_IEM_RAISED_XCPT; /* not reached */1650 #endif1651 }1652 1653 1654 /**1655 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.1656 *1657 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.1658 */1659 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))1660 {1661 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)1662 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))1663 iemRaiseUndefinedOpcodeJmp(pVCpu);1664 else1665 iemRaiseDeviceNotAvailableJmp(pVCpu);1666 #ifndef _MSC_VER1667 return VINF_IEM_RAISED_XCPT; /* not reached */1668 #endif1669 }1670 1671 1672 /**1673 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.1674 *1675 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.1676 */1677 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))1678 {1679 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)1680 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))1681 iemRaiseUndefinedOpcodeJmp(pVCpu);1682 else1683 iemRaiseDeviceNotAvailableJmp(pVCpu);1684 #ifndef _MSC_VER1685 return VINF_IEM_RAISED_XCPT; /* not reached */1686 #endif1687 }1688 1689 1690 /**1691 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.1692 *1693 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.1694 */1695 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))1696 {1697 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)1698 iemRaiseSimdFpExceptionJmp(pVCpu);1699 else1700 iemRaiseUndefinedOpcodeJmp(pVCpu);1701 #ifndef _MSC_VER1702 return VINF_IEM_RAISED_XCPT; /* not reached */1703 #endif1704 }1705 1706 1707 /**1708 * Used by TB code when it wants to raise a \#NM.1709 */1710 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))1711 {1712 iemRaiseDeviceNotAvailableJmp(pVCpu);1713 #ifndef _MSC_VER1714 return VINF_IEM_RAISED_XCPT; /* not reached */1715 #endif1716 }1717 1718 1719 /**1720 * Used by TB code when it wants to raise a \#GP(0).1721 */1722 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))1723 {1724 iemRaiseGeneralProtectionFault0Jmp(pVCpu);1725 #ifndef _MSC_VER1726 return VINF_IEM_RAISED_XCPT; /* not reached */1727 #endif1728 }1729 1730 1731 /**1732 * Used by TB code when it wants to raise a \#MF.1733 */1734 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))1735 {1736 iemRaiseMathFaultJmp(pVCpu);1737 #ifndef _MSC_VER1738 return VINF_IEM_RAISED_XCPT; /* not reached */1739 #endif1740 }1741 1742 1743 /**1744 * Used by TB code when it wants to raise a \#XF.1745 */1746 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))1747 {1748 iemRaiseSimdFpExceptionJmp(pVCpu);1749 #ifndef _MSC_VER1750 return VINF_IEM_RAISED_XCPT; /* not reached */1751 #endif1752 }1753 1754 1755 /**1756 * Used by TB code when detecting opcode changes.1757 * @see iemThreadeFuncWorkerObsoleteTb1758 */1759 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))1760 {1761 /* We set fSafeToFree to false where as we're being called in the context1762 of a TB callback function, which for native TBs means we cannot release1763 the executable memory till we've returned our way back to iemTbExec as1764 that return path codes via the native code generated for the TB. */1765 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));1766 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);1767 return VINF_IEM_REEXEC_BREAK;1768 }1769 1770 1771 /**1772 * Used by TB code when we need to switch to a TB with CS.LIM checking.1773 */1774 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))1775 {1776 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",1777 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,1778 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,1779 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));1780 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);1781 return VINF_IEM_REEXEC_BREAK;1782 }1783 1784 1785 /**1786 * Used by TB code when we missed a PC check after a branch.1787 */1788 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))1789 {1790 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",1791 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,1792 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,1793 pVCpu->iem.s.pbInstrBuf));1794 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);1795 return VINF_IEM_REEXEC_BREAK;1796 }1797 1798 1799 1800 /*********************************************************************************************************************************1801 * Helpers: Segmented memory fetches and stores. *1802 *********************************************************************************************************************************/1803 1804 /**1805 * Used by TB code to load unsigned 8-bit data w/ segmentation.1806 */1807 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1808 {1809 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1810 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);1811 #else1812 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);1813 #endif1814 }1815 1816 1817 /**1818 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it1819 * to 16 bits.1820 */1821 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1822 {1823 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1824 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);1825 #else1826 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);1827 #endif1828 }1829 1830 1831 /**1832 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it1833 * to 32 bits.1834 */1835 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1836 {1837 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1838 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);1839 #else1840 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);1841 #endif1842 }1843 1844 /**1845 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it1846 * to 64 bits.1847 */1848 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1849 {1850 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1851 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);1852 #else1853 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);1854 #endif1855 }1856 1857 1858 /**1859 * Used by TB code to load unsigned 16-bit data w/ segmentation.1860 */1861 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1862 {1863 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1864 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);1865 #else1866 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);1867 #endif1868 }1869 1870 1871 /**1872 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it1873 * to 32 bits.1874 */1875 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1876 {1877 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1878 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);1879 #else1880 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);1881 #endif1882 }1883 1884 1885 /**1886 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it1887 * to 64 bits.1888 */1889 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1890 {1891 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1892 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);1893 #else1894 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);1895 #endif1896 }1897 1898 1899 /**1900 * Used by TB code to load unsigned 32-bit data w/ segmentation.1901 */1902 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1903 {1904 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1905 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);1906 #else1907 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);1908 #endif1909 }1910 1911 1912 /**1913 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it1914 * to 64 bits.1915 */1916 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1917 {1918 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1919 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);1920 #else1921 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);1922 #endif1923 }1924 1925 1926 /**1927 * Used by TB code to load unsigned 64-bit data w/ segmentation.1928 */1929 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))1930 {1931 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1932 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);1933 #else1934 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);1935 #endif1936 }1937 1938 1939 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR1940 /**1941 * Used by TB code to load 128-bit data w/ segmentation.1942 */1943 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))1944 {1945 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1946 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);1947 #else1948 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);1949 #endif1950 }1951 1952 1953 /**1954 * Used by TB code to load 128-bit data w/ segmentation.1955 */1956 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))1957 {1958 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1959 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);1960 #else1961 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);1962 #endif1963 }1964 1965 1966 /**1967 * Used by TB code to load 128-bit data w/ segmentation.1968 */1969 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))1970 {1971 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1972 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);1973 #else1974 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);1975 #endif1976 }1977 1978 1979 /**1980 * Used by TB code to load 256-bit data w/ segmentation.1981 */1982 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))1983 {1984 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1985 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);1986 #else1987 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);1988 #endif1989 }1990 1991 1992 /**1993 * Used by TB code to load 256-bit data w/ segmentation.1994 */1995 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))1996 {1997 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH1998 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);1999 #else2000 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);2001 #endif2002 }2003 #endif2004 2005 2006 /**2007 * Used by TB code to store unsigned 8-bit data w/ segmentation.2008 */2009 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))2010 {2011 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2012 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);2013 #else2014 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);2015 #endif2016 }2017 2018 2019 /**2020 * Used by TB code to store unsigned 16-bit data w/ segmentation.2021 */2022 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))2023 {2024 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2025 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);2026 #else2027 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);2028 #endif2029 }2030 2031 2032 /**2033 * Used by TB code to store unsigned 32-bit data w/ segmentation.2034 */2035 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))2036 {2037 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2038 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);2039 #else2040 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);2041 #endif2042 }2043 2044 2045 /**2046 * Used by TB code to store unsigned 64-bit data w/ segmentation.2047 */2048 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))2049 {2050 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2051 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);2052 #else2053 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);2054 #endif2055 }2056 2057 2058 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR2059 /**2060 * Used by TB code to store unsigned 128-bit data w/ segmentation.2061 */2062 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))2063 {2064 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2065 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);2066 #else2067 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);2068 #endif2069 }2070 2071 2072 /**2073 * Used by TB code to store unsigned 128-bit data w/ segmentation.2074 */2075 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))2076 {2077 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2078 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);2079 #else2080 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);2081 #endif2082 }2083 2084 2085 /**2086 * Used by TB code to store unsigned 256-bit data w/ segmentation.2087 */2088 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))2089 {2090 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2091 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);2092 #else2093 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);2094 #endif2095 }2096 2097 2098 /**2099 * Used by TB code to store unsigned 256-bit data w/ segmentation.2100 */2101 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))2102 {2103 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2104 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);2105 #else2106 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);2107 #endif2108 }2109 #endif2110 2111 2112 2113 /**2114 * Used by TB code to store an unsigned 16-bit value onto a generic stack.2115 */2116 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))2117 {2118 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH2119 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);2120 #else2121 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);2122 #endif2123 }2124 2125 2126 /**2127 * Used by TB code to store an unsigned 32-bit value onto a generic stack.2128 */2129 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))2130 {2131 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH2132 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);2133 #else2134 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);2135 #endif2136 }2137 2138 2139 /**2140 * Used by TB code to store an 32-bit selector value onto a generic stack.2141 *2142 * Intel CPUs doesn't do write a whole dword, thus the special function.2143 */2144 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))2145 {2146 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH2147 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);2148 #else2149 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);2150 #endif2151 }2152 2153 2154 /**2155 * Used by TB code to push unsigned 64-bit value onto a generic stack.2156 */2157 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))2158 {2159 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH2160 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);2161 #else2162 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);2163 #endif2164 }2165 2166 2167 /**2168 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.2169 */2170 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2171 {2172 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP2173 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);2174 #else2175 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);2176 #endif2177 }2178 2179 2180 /**2181 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.2182 */2183 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2184 {2185 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP2186 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);2187 #else2188 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);2189 #endif2190 }2191 2192 2193 /**2194 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.2195 */2196 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2197 {2198 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP2199 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);2200 #else2201 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);2202 #endif2203 }2204 2205 2206 2207 /*********************************************************************************************************************************2208 * Helpers: Flat memory fetches and stores. *2209 *********************************************************************************************************************************/2210 2211 /**2212 * Used by TB code to load unsigned 8-bit data w/ flat address.2213 * @note Zero extending the value to 64-bit to simplify assembly.2214 */2215 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2216 {2217 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2218 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2219 #else2220 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);2221 #endif2222 }2223 2224 2225 /**2226 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it2227 * to 16 bits.2228 * @note Zero extending the value to 64-bit to simplify assembly.2229 */2230 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2231 {2232 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2233 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2234 #else2235 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);2236 #endif2237 }2238 2239 2240 /**2241 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it2242 * to 32 bits.2243 * @note Zero extending the value to 64-bit to simplify assembly.2244 */2245 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2246 {2247 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2248 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2249 #else2250 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);2251 #endif2252 }2253 2254 2255 /**2256 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it2257 * to 64 bits.2258 */2259 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2260 {2261 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2262 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2263 #else2264 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);2265 #endif2266 }2267 2268 2269 /**2270 * Used by TB code to load unsigned 16-bit data w/ flat address.2271 * @note Zero extending the value to 64-bit to simplify assembly.2272 */2273 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2274 {2275 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2276 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2277 #else2278 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);2279 #endif2280 }2281 2282 2283 /**2284 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it2285 * to 32 bits.2286 * @note Zero extending the value to 64-bit to simplify assembly.2287 */2288 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2289 {2290 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2291 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2292 #else2293 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);2294 #endif2295 }2296 2297 2298 /**2299 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it2300 * to 64 bits.2301 * @note Zero extending the value to 64-bit to simplify assembly.2302 */2303 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2304 {2305 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2306 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2307 #else2308 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);2309 #endif2310 }2311 2312 2313 /**2314 * Used by TB code to load unsigned 32-bit data w/ flat address.2315 * @note Zero extending the value to 64-bit to simplify assembly.2316 */2317 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2318 {2319 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2320 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2321 #else2322 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);2323 #endif2324 }2325 2326 2327 /**2328 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it2329 * to 64 bits.2330 * @note Zero extending the value to 64-bit to simplify assembly.2331 */2332 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2333 {2334 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2335 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2336 #else2337 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);2338 #endif2339 }2340 2341 2342 /**2343 * Used by TB code to load unsigned 64-bit data w/ flat address.2344 */2345 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2346 {2347 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2348 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);2349 #else2350 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);2351 #endif2352 }2353 2354 2355 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR2356 /**2357 * Used by TB code to load unsigned 128-bit data w/ flat address.2358 */2359 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))2360 {2361 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2362 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);2363 #else2364 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);2365 #endif2366 }2367 2368 2369 /**2370 * Used by TB code to load unsigned 128-bit data w/ flat address.2371 */2372 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))2373 {2374 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2375 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);2376 #else2377 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);2378 #endif2379 }2380 2381 2382 /**2383 * Used by TB code to load unsigned 128-bit data w/ flat address.2384 */2385 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))2386 {2387 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2388 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);2389 #else2390 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);2391 #endif2392 }2393 2394 2395 /**2396 * Used by TB code to load unsigned 256-bit data w/ flat address.2397 */2398 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))2399 {2400 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2401 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);2402 #else2403 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);2404 #endif2405 }2406 2407 2408 /**2409 * Used by TB code to load unsigned 256-bit data w/ flat address.2410 */2411 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))2412 {2413 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH2414 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);2415 #else2416 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);2417 #endif2418 }2419 #endif2420 2421 2422 /**2423 * Used by TB code to store unsigned 8-bit data w/ flat address.2424 */2425 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))2426 {2427 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2428 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);2429 #else2430 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);2431 #endif2432 }2433 2434 2435 /**2436 * Used by TB code to store unsigned 16-bit data w/ flat address.2437 */2438 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))2439 {2440 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2441 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);2442 #else2443 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);2444 #endif2445 }2446 2447 2448 /**2449 * Used by TB code to store unsigned 32-bit data w/ flat address.2450 */2451 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))2452 {2453 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2454 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);2455 #else2456 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);2457 #endif2458 }2459 2460 2461 /**2462 * Used by TB code to store unsigned 64-bit data w/ flat address.2463 */2464 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))2465 {2466 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2467 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);2468 #else2469 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);2470 #endif2471 }2472 2473 2474 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR2475 /**2476 * Used by TB code to store unsigned 128-bit data w/ flat address.2477 */2478 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))2479 {2480 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2481 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);2482 #else2483 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);2484 #endif2485 }2486 2487 2488 /**2489 * Used by TB code to store unsigned 128-bit data w/ flat address.2490 */2491 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))2492 {2493 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2494 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);2495 #else2496 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);2497 #endif2498 }2499 2500 2501 /**2502 * Used by TB code to store unsigned 256-bit data w/ flat address.2503 */2504 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))2505 {2506 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2507 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);2508 #else2509 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);2510 #endif2511 }2512 2513 2514 /**2515 * Used by TB code to store unsigned 256-bit data w/ flat address.2516 */2517 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))2518 {2519 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE2520 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);2521 #else2522 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);2523 #endif2524 }2525 #endif2526 2527 2528 2529 /**2530 * Used by TB code to store an unsigned 16-bit value onto a flat stack.2531 */2532 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))2533 {2534 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH2535 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);2536 #else2537 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);2538 #endif2539 }2540 2541 2542 /**2543 * Used by TB code to store an unsigned 32-bit value onto a flat stack.2544 */2545 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))2546 {2547 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH2548 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);2549 #else2550 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);2551 #endif2552 }2553 2554 2555 /**2556 * Used by TB code to store a segment selector value onto a flat stack.2557 *2558 * Intel CPUs doesn't do write a whole dword, thus the special function.2559 */2560 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))2561 {2562 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH2563 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);2564 #else2565 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);2566 #endif2567 }2568 2569 2570 /**2571 * Used by TB code to store an unsigned 64-bit value onto a flat stack.2572 */2573 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))2574 {2575 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH2576 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);2577 #else2578 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);2579 #endif2580 }2581 2582 2583 /**2584 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.2585 */2586 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2587 {2588 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP2589 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);2590 #else2591 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);2592 #endif2593 }2594 2595 2596 /**2597 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.2598 */2599 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2600 {2601 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP2602 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);2603 #else2604 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);2605 #endif2606 }2607 2608 2609 /**2610 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.2611 */2612 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))2613 {2614 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP2615 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);2616 #else2617 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);2618 #endif2619 }2620 2621 2622 2623 /*********************************************************************************************************************************2624 * Helpers: Segmented memory mapping. *2625 *********************************************************************************************************************************/2626 2627 /**2628 * Used by TB code to map unsigned 8-bit data for atomic read-write w/2629 * segmentation.2630 */2631 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2632 RTGCPTR GCPtrMem, uint8_t iSegReg))2633 {2634 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2635 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2636 #else2637 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2638 #endif2639 }2640 2641 2642 /**2643 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.2644 */2645 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2646 RTGCPTR GCPtrMem, uint8_t iSegReg))2647 {2648 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2649 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2650 #else2651 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2652 #endif2653 }2654 2655 2656 /**2657 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.2658 */2659 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2660 RTGCPTR GCPtrMem, uint8_t iSegReg))2661 {2662 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2663 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2664 #else2665 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2666 #endif2667 }2668 2669 2670 /**2671 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.2672 */2673 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2674 RTGCPTR GCPtrMem, uint8_t iSegReg))2675 {2676 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2677 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2678 #else2679 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2680 #endif2681 }2682 2683 2684 /**2685 * Used by TB code to map unsigned 16-bit data for atomic read-write w/2686 * segmentation.2687 */2688 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2689 RTGCPTR GCPtrMem, uint8_t iSegReg))2690 {2691 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2692 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2693 #else2694 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2695 #endif2696 }2697 2698 2699 /**2700 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.2701 */2702 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2703 RTGCPTR GCPtrMem, uint8_t iSegReg))2704 {2705 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2706 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2707 #else2708 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2709 #endif2710 }2711 2712 2713 /**2714 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.2715 */2716 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2717 RTGCPTR GCPtrMem, uint8_t iSegReg))2718 {2719 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2720 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2721 #else2722 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2723 #endif2724 }2725 2726 2727 /**2728 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.2729 */2730 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2731 RTGCPTR GCPtrMem, uint8_t iSegReg))2732 {2733 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2734 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2735 #else2736 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2737 #endif2738 }2739 2740 2741 /**2742 * Used by TB code to map unsigned 32-bit data for atomic read-write w/2743 * segmentation.2744 */2745 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2746 RTGCPTR GCPtrMem, uint8_t iSegReg))2747 {2748 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2749 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2750 #else2751 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2752 #endif2753 }2754 2755 2756 /**2757 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.2758 */2759 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2760 RTGCPTR GCPtrMem, uint8_t iSegReg))2761 {2762 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2763 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2764 #else2765 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2766 #endif2767 }2768 2769 2770 /**2771 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.2772 */2773 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2774 RTGCPTR GCPtrMem, uint8_t iSegReg))2775 {2776 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2777 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2778 #else2779 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2780 #endif2781 }2782 2783 2784 /**2785 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.2786 */2787 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2788 RTGCPTR GCPtrMem, uint8_t iSegReg))2789 {2790 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2791 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2792 #else2793 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2794 #endif2795 }2796 2797 2798 /**2799 * Used by TB code to map unsigned 64-bit data for atomic read-write w/2800 * segmentation.2801 */2802 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2803 RTGCPTR GCPtrMem, uint8_t iSegReg))2804 {2805 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2806 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2807 #else2808 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2809 #endif2810 }2811 2812 2813 /**2814 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.2815 */2816 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2817 RTGCPTR GCPtrMem, uint8_t iSegReg))2818 {2819 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2820 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2821 #else2822 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2823 #endif2824 }2825 2826 2827 /**2828 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.2829 */2830 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2831 RTGCPTR GCPtrMem, uint8_t iSegReg))2832 {2833 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2834 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2835 #else2836 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2837 #endif2838 }2839 2840 2841 /**2842 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.2843 */2844 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2845 RTGCPTR GCPtrMem, uint8_t iSegReg))2846 {2847 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2848 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2849 #else2850 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2851 #endif2852 }2853 2854 2855 /**2856 * Used by TB code to map 80-bit float data writeonly w/ segmentation.2857 */2858 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2859 RTGCPTR GCPtrMem, uint8_t iSegReg))2860 {2861 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2862 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2863 #else2864 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2865 #endif2866 }2867 2868 2869 /**2870 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.2871 */2872 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2873 RTGCPTR GCPtrMem, uint8_t iSegReg))2874 {2875 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2876 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2877 #else2878 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2879 #endif2880 }2881 2882 2883 /**2884 * Used by TB code to map unsigned 128-bit data for atomic read-write w/2885 * segmentation.2886 */2887 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2888 RTGCPTR GCPtrMem, uint8_t iSegReg))2889 {2890 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2891 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2892 #else2893 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2894 #endif2895 }2896 2897 2898 /**2899 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.2900 */2901 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2902 RTGCPTR GCPtrMem, uint8_t iSegReg))2903 {2904 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2905 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2906 #else2907 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2908 #endif2909 }2910 2911 2912 /**2913 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.2914 */2915 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2916 RTGCPTR GCPtrMem, uint8_t iSegReg))2917 {2918 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2919 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2920 #else2921 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2922 #endif2923 }2924 2925 2926 /**2927 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.2928 */2929 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,2930 RTGCPTR GCPtrMem, uint8_t iSegReg))2931 {2932 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2933 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2934 #else2935 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);2936 #endif2937 }2938 2939 2940 /*********************************************************************************************************************************2941 * Helpers: Flat memory mapping. *2942 *********************************************************************************************************************************/2943 2944 /**2945 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat2946 * address.2947 */2948 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))2949 {2950 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2951 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);2952 #else2953 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);2954 #endif2955 }2956 2957 2958 /**2959 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.2960 */2961 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))2962 {2963 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2964 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);2965 #else2966 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);2967 #endif2968 }2969 2970 2971 /**2972 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.2973 */2974 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))2975 {2976 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2977 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);2978 #else2979 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);2980 #endif2981 }2982 2983 2984 /**2985 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.2986 */2987 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))2988 {2989 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED2990 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);2991 #else2992 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);2993 #endif2994 }2995 2996 2997 /**2998 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat2999 * address.3000 */3001 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3002 {3003 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3004 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3005 #else3006 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);3007 #endif3008 }3009 3010 3011 /**3012 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.3013 */3014 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3015 {3016 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3017 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3018 #else3019 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);3020 #endif3021 }3022 3023 3024 /**3025 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.3026 */3027 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3028 {3029 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3030 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3031 #else3032 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3033 #endif3034 }3035 3036 3037 /**3038 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.3039 */3040 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3041 {3042 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3043 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3044 #else3045 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3046 #endif3047 }3048 3049 3050 /**3051 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat3052 * address.3053 */3054 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3055 {3056 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3057 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3058 #else3059 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);3060 #endif3061 }3062 3063 3064 /**3065 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.3066 */3067 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3068 {3069 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3070 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3071 #else3072 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);3073 #endif3074 }3075 3076 3077 /**3078 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.3079 */3080 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3081 {3082 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3083 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3084 #else3085 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3086 #endif3087 }3088 3089 3090 /**3091 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.3092 */3093 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3094 {3095 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3096 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3097 #else3098 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3099 #endif3100 }3101 3102 3103 /**3104 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat3105 * address.3106 */3107 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3108 {3109 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3110 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3111 #else3112 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);3113 #endif3114 }3115 3116 3117 /**3118 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.3119 */3120 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3121 {3122 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3123 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3124 #else3125 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);3126 #endif3127 }3128 3129 3130 /**3131 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.3132 */3133 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3134 {3135 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3136 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3137 #else3138 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3139 #endif3140 }3141 3142 3143 /**3144 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.3145 */3146 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3147 {3148 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3149 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3150 #else3151 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3152 #endif3153 }3154 3155 3156 /**3157 * Used by TB code to map 80-bit float data writeonly w/ flat address.3158 */3159 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3160 {3161 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3162 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3163 #else3164 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3165 #endif3166 }3167 3168 3169 /**3170 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.3171 */3172 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3173 {3174 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3175 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3176 #else3177 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3178 #endif3179 }3180 3181 3182 /**3183 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat3184 * address.3185 */3186 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3187 {3188 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3189 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3190 #else3191 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);3192 #endif3193 }3194 3195 3196 /**3197 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.3198 */3199 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3200 {3201 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3202 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3203 #else3204 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);3205 #endif3206 }3207 3208 3209 /**3210 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.3211 */3212 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3213 {3214 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3215 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3216 #else3217 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3218 #endif3219 }3220 3221 3222 /**3223 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.3224 */3225 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))3226 {3227 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED3228 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);3229 #else3230 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);3231 #endif3232 }3233 3234 3235 /*********************************************************************************************************************************3236 * Helpers: Commit, rollback & unmap *3237 *********************************************************************************************************************************/3238 3239 /**3240 * Used by TB code to commit and unmap a read-write memory mapping.3241 */3242 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))3243 {3244 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);3245 }3246 3247 3248 /**3249 * Used by TB code to commit and unmap a read-write memory mapping.3250 */3251 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))3252 {3253 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);3254 }3255 3256 3257 /**3258 * Used by TB code to commit and unmap a write-only memory mapping.3259 */3260 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))3261 {3262 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);3263 }3264 3265 3266 /**3267 * Used by TB code to commit and unmap a read-only memory mapping.3268 */3269 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))3270 {3271 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);3272 }3273 3274 3275 /**3276 * Reinitializes the native recompiler state.3277 *3278 * Called before starting a new recompile job.3279 */3280 static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)3281 {3282 pReNative->cLabels = 0;3283 pReNative->bmLabelTypes = 0;3284 pReNative->cFixups = 0;3285 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO3286 pReNative->pDbgInfo->cEntries = 0;3287 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;3288 #endif3289 pReNative->pTbOrg = pTb;3290 pReNative->cCondDepth = 0;3291 pReNative->uCondSeqNo = 0;3292 pReNative->uCheckIrqSeqNo = 0;3293 pReNative->uTlbSeqNo = 0;3294 3295 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING3296 pReNative->Core.offPc = 0;3297 pReNative->Core.cInstrPcUpdateSkipped = 0;3298 #endif3299 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR3300 pReNative->fSimdRaiseXcptChecksEmitted = 0;3301 #endif3302 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK3303 #if IEMNATIVE_HST_GREG_COUNT < 323304 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)3305 #endif3306 ;3307 pReNative->Core.bmHstRegsWithGstShadow = 0;3308 pReNative->Core.bmGstRegShadows = 0;3309 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK3310 pReNative->Core.bmGstRegShadowDirty = 0;3311 #endif3312 pReNative->Core.bmVars = 0;3313 pReNative->Core.bmStack = 0;3314 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */3315 pReNative->Core.u64ArgVars = UINT64_MAX;3316 3317 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);3318 pReNative->aidxUniqueLabels[0] = UINT32_MAX;3319 pReNative->aidxUniqueLabels[1] = UINT32_MAX;3320 pReNative->aidxUniqueLabels[2] = UINT32_MAX;3321 pReNative->aidxUniqueLabels[3] = UINT32_MAX;3322 pReNative->aidxUniqueLabels[4] = UINT32_MAX;3323 pReNative->aidxUniqueLabels[5] = UINT32_MAX;3324 pReNative->aidxUniqueLabels[6] = UINT32_MAX;3325 pReNative->aidxUniqueLabels[7] = UINT32_MAX;3326 pReNative->aidxUniqueLabels[8] = UINT32_MAX;3327 pReNative->aidxUniqueLabels[9] = UINT32_MAX;3328 pReNative->aidxUniqueLabels[10] = UINT32_MAX;3329 pReNative->aidxUniqueLabels[11] = UINT32_MAX;3330 pReNative->aidxUniqueLabels[12] = UINT32_MAX;3331 pReNative->aidxUniqueLabels[13] = UINT32_MAX;3332 pReNative->aidxUniqueLabels[14] = UINT32_MAX;3333 pReNative->aidxUniqueLabels[15] = UINT32_MAX;3334 pReNative->aidxUniqueLabels[16] = UINT32_MAX;3335 3336 /* Full host register reinit: */3337 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)3338 {3339 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;3340 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;3341 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;3342 }3343 3344 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK3345 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)3346 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX3347 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)3348 #endif3349 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX3350 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)3351 #endif3352 #ifdef IEMNATIVE_REG_FIXED_TMP13353 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)3354 #endif3355 #ifdef IEMNATIVE_REG_FIXED_PC_DBG3356 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)3357 #endif3358 );3359 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)3360 {3361 fRegs &= ~RT_BIT_32(idxReg);3362 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;3363 }3364 3365 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;3366 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX3367 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;3368 #endif3369 #ifdef IEMNATIVE_REG_FIXED_TMP03370 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;3371 #endif3372 #ifdef IEMNATIVE_REG_FIXED_TMP13373 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;3374 #endif3375 #ifdef IEMNATIVE_REG_FIXED_PC_DBG3376 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;3377 #endif3378 3379 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR3380 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK3381 # if IEMNATIVE_HST_SIMD_REG_COUNT < 323382 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)3383 # endif3384 ;3385 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;3386 pReNative->Core.bmGstSimdRegShadows = 0;3387 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;3388 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;3389 3390 /* Full host register reinit: */3391 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)3392 {3393 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;3394 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;3395 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;3396 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;3397 }3398 3399 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;3400 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)3401 {3402 fRegs &= ~RT_BIT_32(idxReg);3403 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;3404 }3405 3406 #ifdef IEMNATIVE_SIMD_REG_FIXED_TMP03407 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;3408 #endif3409 3410 #endif3411 3412 return pReNative;3413 }3414 3415 3416 /**3417 * Allocates and initializes the native recompiler state.3418 *3419 * This is called the first time an EMT wants to recompile something.3420 *3421 * @returns Pointer to the new recompiler state.3422 * @param pVCpu The cross context virtual CPU structure of the calling3423 * thread.3424 * @param pTb The TB that's about to be recompiled.3425 * @thread EMT(pVCpu)3426 */3427 static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)3428 {3429 VMCPU_ASSERT_EMT(pVCpu);3430 3431 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));3432 AssertReturn(pReNative, NULL);3433 3434 /*3435 * Try allocate all the buffers and stuff we need.3436 */3437 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);3438 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);3439 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);3440 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO3441 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));3442 #endif3443 if (RT_LIKELY( pReNative->pInstrBuf3444 && pReNative->paLabels3445 && pReNative->paFixups)3446 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO3447 && pReNative->pDbgInfo3448 #endif3449 )3450 {3451 /*3452 * Set the buffer & array sizes on success.3453 */3454 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);3455 pReNative->cLabelsAlloc = _8K;3456 pReNative->cFixupsAlloc = _16K;3457 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO3458 pReNative->cDbgInfoAlloc = _16K;3459 #endif3460 3461 /* Other constant stuff: */3462 pReNative->pVCpu = pVCpu;3463 3464 /*3465 * Done, just need to save it and reinit it.3466 */3467 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;3468 return iemNativeReInit(pReNative, pTb);3469 }3470 3471 /*3472 * Failed. Cleanup and return.3473 */3474 AssertFailed();3475 RTMemFree(pReNative->pInstrBuf);3476 RTMemFree(pReNative->paLabels);3477 RTMemFree(pReNative->paFixups);3478 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO3479 RTMemFree(pReNative->pDbgInfo);3480 #endif3481 RTMemFree(pReNative);3482 return NULL;3483 }3484 3485 3486 /**3487 * Creates a label3488 *3489 * If the label does not yet have a defined position,3490 * call iemNativeLabelDefine() later to set it.3491 *3492 * @returns Label ID. Throws VBox status code on failure, so no need to check3493 * the return value.3494 * @param pReNative The native recompile state.3495 * @param enmType The label type.3496 * @param offWhere The instruction offset of the label. UINT32_MAX if the3497 * label is not yet defined (default).3498 * @param uData Data associated with the lable. Only applicable to3499 * certain type of labels. Default is zero.3500 */3501 DECL_HIDDEN_THROW(uint32_t)3502 iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,3503 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)3504 {3505 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);3506 3507 /*3508 * Locate existing label definition.3509 *3510 * This is only allowed for forward declarations where offWhere=UINT32_MAX3511 * and uData is zero.3512 */3513 PIEMNATIVELABEL paLabels = pReNative->paLabels;3514 uint32_t const cLabels = pReNative->cLabels;3515 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)3516 #ifndef VBOX_STRICT3517 && enmType < kIemNativeLabelType_FirstWithMultipleInstances3518 && offWhere == UINT32_MAX3519 && uData == 03520 #endif3521 )3522 {3523 #ifndef VBOX_STRICT3524 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,3525 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));3526 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];3527 if (idxLabel < pReNative->cLabels)3528 return idxLabel;3529 #else3530 for (uint32_t i = 0; i < cLabels; i++)3531 if ( paLabels[i].enmType == enmType3532 && paLabels[i].uData == uData)3533 {3534 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));3535 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));3536 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));3537 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,3538 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));3539 return i;3540 }3541 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances3542 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));3543 #endif3544 }3545 3546 /*3547 * Make sure we've got room for another label.3548 */3549 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))3550 { /* likely */ }3551 else3552 {3553 uint32_t cNew = pReNative->cLabelsAlloc;3554 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));3555 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));3556 cNew *= 2;3557 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */3558 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));3559 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));3560 pReNative->paLabels = paLabels;3561 pReNative->cLabelsAlloc = cNew;3562 }3563 3564 /*3565 * Define a new label.3566 */3567 paLabels[cLabels].off = offWhere;3568 paLabels[cLabels].enmType = enmType;3569 paLabels[cLabels].uData = uData;3570 pReNative->cLabels = cLabels + 1;3571 3572 Assert((unsigned)enmType < 64);3573 pReNative->bmLabelTypes |= RT_BIT_64(enmType);3574 3575 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)3576 {3577 Assert(uData == 0);3578 pReNative->aidxUniqueLabels[enmType] = cLabels;3579 }3580 3581 if (offWhere != UINT32_MAX)3582 {3583 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO3584 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);3585 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);3586 #endif3587 }3588 return cLabels;3589 }3590 3591 3592 /**3593 * Defines the location of an existing label.3594 *3595 * @param pReNative The native recompile state.3596 * @param idxLabel The label to define.3597 * @param offWhere The position.3598 */3599 DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)3600 {3601 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));3602 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];3603 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));3604 pLabel->off = offWhere;3605 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO3606 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);3607 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);3608 #endif3609 }3610 3611 3612 /**3613 * Looks up a lable.3614 *3615 * @returns Label ID if found, UINT32_MAX if not.3616 */3617 static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,3618 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT3619 {3620 Assert((unsigned)enmType < 64);3621 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)3622 {3623 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)3624 return pReNative->aidxUniqueLabels[enmType];3625 3626 PIEMNATIVELABEL paLabels = pReNative->paLabels;3627 uint32_t const cLabels = pReNative->cLabels;3628 for (uint32_t i = 0; i < cLabels; i++)3629 if ( paLabels[i].enmType == enmType3630 && paLabels[i].uData == uData3631 && ( paLabels[i].off == offWhere3632 || offWhere == UINT32_MAX3633 || paLabels[i].off == UINT32_MAX))3634 return i;3635 }3636 return UINT32_MAX;3637 }3638 3639 3640 /**3641 * Adds a fixup.3642 *3643 * @throws VBox status code (int) on failure.3644 * @param pReNative The native recompile state.3645 * @param offWhere The instruction offset of the fixup location.3646 * @param idxLabel The target label ID for the fixup.3647 * @param enmType The fixup type.3648 * @param offAddend Fixup addend if applicable to the type. Default is 0.3649 */3650 DECL_HIDDEN_THROW(void)3651 iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,3652 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)3653 {3654 Assert(idxLabel <= UINT16_MAX);3655 Assert((unsigned)enmType <= UINT8_MAX);3656 #ifdef RT_ARCH_ARM643657 AssertStmt( enmType != kIemNativeFixupType_RelImm14At53658 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,3659 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));3660 #endif3661 3662 /*3663 * Make sure we've room.3664 */3665 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;3666 uint32_t const cFixups = pReNative->cFixups;3667 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))3668 { /* likely */ }3669 else3670 {3671 uint32_t cNew = pReNative->cFixupsAlloc;3672 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));3673 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));3674 cNew *= 2;3675 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));3676 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));3677 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));3678 pReNative->paFixups = paFixups;3679 pReNative->cFixupsAlloc = cNew;3680 }3681 3682 /*3683 * Add the fixup.3684 */3685 paFixups[cFixups].off = offWhere;3686 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;3687 paFixups[cFixups].enmType = enmType;3688 paFixups[cFixups].offAddend = offAddend;3689 pReNative->cFixups = cFixups + 1;3690 }3691 3692 3693 /**3694 * Slow code path for iemNativeInstrBufEnsure.3695 */3696 DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)3697 {3698 /* Double the buffer size till we meet the request. */3699 uint32_t cNew = pReNative->cInstrBufAlloc;3700 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */3701 do3702 cNew *= 2;3703 while (cNew < off + cInstrReq);3704 3705 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);3706 #ifdef RT_ARCH_ARM643707 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */3708 #else3709 uint32_t const cbMaxInstrBuf = _2M;3710 #endif3711 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));3712 3713 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);3714 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));3715 3716 #ifdef VBOX_STRICT3717 pReNative->offInstrBufChecked = off + cInstrReq;3718 #endif3719 pReNative->cInstrBufAlloc = cNew;3720 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;3721 }3722 3723 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO3724 3725 /**3726 * Grows the static debug info array used during recompilation.3727 *3728 * @returns Pointer to the new debug info block; throws VBox status code on3729 * failure, so no need to check the return value.3730 */3731 DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)3732 {3733 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;3734 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));3735 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));3736 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));3737 pReNative->pDbgInfo = pDbgInfo;3738 pReNative->cDbgInfoAlloc = cNew;3739 return pDbgInfo;3740 }3741 3742 3743 /**3744 * Adds a new debug info uninitialized entry, returning the pointer to it.3745 */3746 DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)3747 {3748 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))3749 { /* likely */ }3750 else3751 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);3752 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];3753 }3754 3755 3756 /**3757 * Debug Info: Adds a native offset record, if necessary.3758 */3759 DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)3760 {3761 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;3762 3763 /*3764 * Do we need this one?3765 */3766 uint32_t const offPrev = pDbgInfo->offNativeLast;3767 if (offPrev == off)3768 return;3769 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));3770 3771 /*3772 * Add it.3773 */3774 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);3775 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;3776 pEntry->NativeOffset.offNative = off;3777 pDbgInfo->offNativeLast = off;3778 }3779 3780 3781 /**3782 * Debug Info: Record info about a label.3783 */3784 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)3785 {3786 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);3787 pEntry->Label.uType = kIemTbDbgEntryType_Label;3788 pEntry->Label.uUnused = 0;3789 pEntry->Label.enmLabel = (uint8_t)enmType;3790 pEntry->Label.uData = uData;3791 }3792 3793 3794 /**3795 * Debug Info: Record info about a threaded call.3796 */3797 static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)3798 {3799 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);3800 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;3801 pEntry->ThreadedCall.fRecompiled = fRecompiled;3802 pEntry->ThreadedCall.uUnused = 0;3803 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;3804 }3805 3806 3807 /**3808 * Debug Info: Record info about a new guest instruction.3809 */3810 static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)3811 {3812 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);3813 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;3814 pEntry->GuestInstruction.uUnused = 0;3815 pEntry->GuestInstruction.fExec = fExec;3816 }3817 3818 3819 /**3820 * Debug Info: Record info about guest register shadowing.3821 */3822 DECL_HIDDEN_THROW(void)3823 iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,3824 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)3825 {3826 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);3827 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;3828 pEntry->GuestRegShadowing.uUnused = 0;3829 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;3830 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;3831 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;3832 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK3833 Assert( idxHstReg != UINT8_MAX3834 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));3835 #endif3836 }3837 3838 3839 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR3840 /**3841 * Debug Info: Record info about guest register shadowing.3842 */3843 DECL_HIDDEN_THROW(void)3844 iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,3845 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)3846 {3847 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);3848 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;3849 pEntry->GuestSimdRegShadowing.uUnused = 0;3850 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;3851 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;3852 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;3853 }3854 # endif3855 3856 3857 # ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING3858 /**3859 * Debug Info: Record info about delayed RIP updates.3860 */3861 DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)3862 {3863 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);3864 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;3865 pEntry->DelayedPcUpdate.offPc = offPc;3866 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;3867 }3868 # endif3869 3870 # if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)3871 3872 /**3873 * Debug Info: Record info about a dirty guest register.3874 */3875 DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,3876 uint8_t idxGstReg, uint8_t idxHstReg)3877 {3878 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);3879 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;3880 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;3881 pEntry->GuestRegDirty.idxGstReg = idxGstReg;3882 pEntry->GuestRegDirty.idxHstReg = idxHstReg;3883 }3884 3885 3886 /**3887 * Debug Info: Record info about a dirty guest register writeback operation.3888 */3889 DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)3890 {3891 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);3892 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;3893 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;3894 pEntry->GuestRegWriteback.fGstReg = (uint32_t)fGstReg;3895 /** @todo r=aeichner Can't fit the whole register mask in the debug info entry, deal with it when it becomes necessary. */3896 Assert((uint64_t)pEntry->GuestRegWriteback.fGstReg == fGstReg);3897 }3898 3899 # endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */3900 3901 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */3902 3903 3904 /*********************************************************************************************************************************3905 * Register Allocator *3906 *********************************************************************************************************************************/3907 3908 /**3909 * Register parameter indexes (indexed by argument number).3910 */3911 DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =3912 {3913 IEMNATIVE_CALL_ARG0_GREG,3914 IEMNATIVE_CALL_ARG1_GREG,3915 IEMNATIVE_CALL_ARG2_GREG,3916 IEMNATIVE_CALL_ARG3_GREG,3917 #if defined(IEMNATIVE_CALL_ARG4_GREG)3918 IEMNATIVE_CALL_ARG4_GREG,3919 # if defined(IEMNATIVE_CALL_ARG5_GREG)3920 IEMNATIVE_CALL_ARG5_GREG,3921 # if defined(IEMNATIVE_CALL_ARG6_GREG)3922 IEMNATIVE_CALL_ARG6_GREG,3923 # if defined(IEMNATIVE_CALL_ARG7_GREG)3924 IEMNATIVE_CALL_ARG7_GREG,3925 # endif3926 # endif3927 # endif3928 #endif3929 };3930 AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);3931 3932 /**3933 * Call register masks indexed by argument count.3934 */3935 DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =3936 {3937 0,3938 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),3939 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),3940 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),3941 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)3942 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),3943 #if defined(IEMNATIVE_CALL_ARG4_GREG)3944 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)3945 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),3946 # if defined(IEMNATIVE_CALL_ARG5_GREG)3947 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)3948 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),3949 # if defined(IEMNATIVE_CALL_ARG6_GREG)3950 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)3951 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)3952 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),3953 # if defined(IEMNATIVE_CALL_ARG7_GREG)3954 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)3955 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)3956 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),3957 # endif3958 # endif3959 # endif3960 #endif3961 };3962 3963 #ifdef IEMNATIVE_FP_OFF_STACK_ARG03964 /**3965 * BP offset of the stack argument slots.3966 *3967 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has3968 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.3969 */3970 DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =3971 {3972 IEMNATIVE_FP_OFF_STACK_ARG0,3973 # ifdef IEMNATIVE_FP_OFF_STACK_ARG13974 IEMNATIVE_FP_OFF_STACK_ARG1,3975 # endif3976 # ifdef IEMNATIVE_FP_OFF_STACK_ARG23977 IEMNATIVE_FP_OFF_STACK_ARG2,3978 # endif3979 # ifdef IEMNATIVE_FP_OFF_STACK_ARG33980 IEMNATIVE_FP_OFF_STACK_ARG3,3981 # endif3982 };3983 AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);3984 #endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */3985 3986 /**3987 * Info about shadowed guest register values.3988 * @see IEMNATIVEGSTREG3989 */3990 DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =3991 {3992 #define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)3993 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },3994 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },3995 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },3996 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },3997 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },3998 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },3999 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },4000 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },4001 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },4002 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },4003 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },4004 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },4005 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },4006 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },4007 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },4008 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },4009 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },4010 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },4011 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },4012 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },4013 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },4014 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },4015 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },4016 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },4017 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },4018 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },4019 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },4020 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },4021 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },4022 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },4023 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },4024 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },4025 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },4026 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },4027 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },4028 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },4029 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },4030 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },4031 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },4032 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },4033 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },4034 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },4035 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },4036 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },4037 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },4038 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },4039 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },4040 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },4041 #undef CPUMCTX_OFF_AND_SIZE4042 };4043 AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);4044 4045 4046 /** Host CPU general purpose register names. */4047 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =4048 {4049 #ifdef RT_ARCH_AMD644050 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"4051 #elif RT_ARCH_ARM644052 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",4053 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",4054 #else4055 # error "port me"4056 #endif4057 };4058 4059 4060 #if 0 /* unused */4061 /**4062 * Tries to locate a suitable register in the given register mask.4063 *4064 * This ASSUMES the caller has done the minimal/optimal allocation checks and4065 * failed.4066 *4067 * @returns Host register number on success, returns UINT8_MAX on failure.4068 */4069 static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)4070 {4071 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));4072 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;4073 if (fRegs)4074 {4075 /** @todo pick better here: */4076 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;4077 4078 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);4079 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)4080 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);4081 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));4082 4083 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;4084 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);4085 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;4086 return idxReg;4087 }4088 return UINT8_MAX;4089 }4090 #endif /* unused */4091 4092 4093 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4094 /**4095 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.4096 *4097 * @returns New code buffer offset on success, UINT32_MAX on failure.4098 * @param pReNative .4099 * @param off The current code buffer position.4100 * @param enmGstReg The guest register to store to.4101 * @param idxHstReg The host register to store from.4102 */4103 DECL_FORCE_INLINE_THROW(uint32_t)4104 iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)4105 {4106 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);4107 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);4108 4109 switch (g_aGstShadowInfo[enmGstReg].cb)4110 {4111 case sizeof(uint64_t):4112 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);4113 case sizeof(uint32_t):4114 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);4115 case sizeof(uint16_t):4116 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);4117 #if 0 /* not present in the table. */4118 case sizeof(uint8_t):4119 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);4120 #endif4121 default:4122 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));4123 }4124 }4125 4126 4127 /**4128 * Emits code to flush a pending write of the given guest register if any.4129 *4130 * @returns New code buffer offset.4131 * @param pReNative The native recompile state.4132 * @param off Current code buffer position.4133 * @param enmGstReg The guest register to flush.4134 */4135 DECL_HIDDEN_THROW(uint32_t)4136 iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)4137 {4138 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];4139 4140 Assert(enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast);4141 Assert( idxHstReg != UINT8_MAX4142 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));4143 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",4144 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));4145 4146 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);4147 4148 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);4149 return off;4150 }4151 4152 4153 /**4154 * Flush the given set of guest registers if marked as dirty.4155 *4156 * @returns New code buffer offset.4157 * @param pReNative The native recompile state.4158 * @param off Current code buffer position.4159 * @param fFlushGstReg The guest register set to flush (default is flush everything).4160 */4161 DECL_HIDDEN_THROW(uint32_t)4162 iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)4163 {4164 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;4165 if (bmGstRegShadowDirty)4166 {4167 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO4168 iemNativeDbgInfoAddNativeOffset(pReNative, off);4169 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);4170 # endif4171 do4172 {4173 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;4174 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);4175 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);4176 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));4177 } while (bmGstRegShadowDirty);4178 }4179 4180 return off;4181 }4182 4183 4184 /**4185 * Flush all shadowed guest registers marked as dirty for the given host register.4186 *4187 * @returns New code buffer offset.4188 * @param pReNative The native recompile state.4189 * @param off Current code buffer position.4190 * @param idxHstReg The host register.4191 *4192 * @note This doesn't do any unshadowing of guest registers from the host register.4193 */4194 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)4195 {4196 /* We need to flush any pending guest register writes this host register shadows. */4197 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;4198 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)4199 {4200 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO4201 iemNativeDbgInfoAddNativeOffset(pReNative, off);4202 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);4203 # endif4204 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're4205 * likely to only have a single bit set. It'll be in the 0..15 range,4206 * but still it's 15 unnecessary loops for the last guest register. */4207 4208 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;4209 do4210 {4211 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;4212 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);4213 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);4214 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));4215 } while (bmGstRegShadowDirty);4216 }4217 4218 return off;4219 }4220 #endif4221 4222 4223 /**4224 * Locate a register, possibly freeing one up.4225 *4226 * This ASSUMES the caller has done the minimal/optimal allocation checks and4227 * failed.4228 *4229 * @returns Host register number on success. Returns UINT8_MAX if no registers4230 * found, the caller is supposed to deal with this and raise a4231 * allocation type specific status code (if desired).4232 *4233 * @throws VBox status code if we're run into trouble spilling a variable of4234 * recording debug info. Does NOT throw anything if we're out of4235 * registers, though.4236 */4237 static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,4238 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)4239 {4240 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);4241 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));4242 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));4243 4244 /*4245 * Try a freed register that's shadowing a guest register.4246 */4247 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;4248 if (fRegs)4249 {4250 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);4251 4252 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS4253 /*4254 * When we have livness information, we use it to kick out all shadowed4255 * guest register that will not be needed any more in this TB. If we're4256 * lucky, this may prevent us from ending up here again.4257 *4258 * Note! We must consider the previous entry here so we don't free4259 * anything that the current threaded function requires (current4260 * entry is produced by the next threaded function).4261 */4262 uint32_t const idxCurCall = pReNative->idxCurCall;4263 if (idxCurCall > 0)4264 {4265 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];4266 4267 # ifndef IEMLIVENESS_EXTENDED_LAYOUT4268 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */4269 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);4270 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */4271 #else4272 /* Construct a mask of the registers not in the read or write state.4273 Note! We could skips writes, if they aren't from us, as this is just4274 a hack to prevent trashing registers that have just been written4275 or will be written when we retire the current instruction. */4276 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm644277 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm644278 & IEMLIVENESSBIT_MASK;4279 #endif4280 /* Merge EFLAGS. */4281 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */4282 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */4283 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */4284 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;4285 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);4286 4287 /* If it matches any shadowed registers. */4288 if (pReNative->Core.bmGstRegShadows & fToFreeMask)4289 {4290 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4291 /* Writeback any dirty shadow registers we are about to unshadow. */4292 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);4293 #endif4294 4295 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);4296 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);4297 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */4298 4299 /* See if we've got any unshadowed registers we can return now. */4300 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;4301 if (fUnshadowedRegs)4302 {4303 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);4304 return (fPreferVolatile4305 ? ASMBitFirstSetU32(fUnshadowedRegs)4306 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK4307 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))4308 - 1;4309 }4310 }4311 }4312 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */4313 4314 unsigned const idxReg = (fPreferVolatile4315 ? ASMBitFirstSetU32(fRegs)4316 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK4317 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))4318 - 1;4319 4320 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);4321 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)4322 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);4323 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));4324 4325 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4326 /* We need to flush any pending guest register writes this host register shadows. */4327 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);4328 #endif4329 4330 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);4331 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;4332 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;4333 return idxReg;4334 }4335 4336 /*4337 * Try free up a variable that's in a register.4338 *4339 * We do two rounds here, first evacuating variables we don't need to be4340 * saved on the stack, then in the second round move things to the stack.4341 */4342 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);4343 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)4344 {4345 uint32_t fVars = pReNative->Core.bmVars;4346 while (fVars)4347 {4348 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;4349 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;4350 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR4351 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */4352 continue;4353 #endif4354 4355 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)4356 && (RT_BIT_32(idxReg) & fRegMask)4357 && ( iLoop == 04358 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack4359 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)4360 && !pReNative->Core.aVars[idxVar].fRegAcquired)4361 {4362 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));4363 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)4364 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);4365 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));4366 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))4367 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));4368 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4369 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));4370 #endif4371 4372 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)4373 {4374 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));4375 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);4376 }4377 4378 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;4379 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);4380 4381 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);4382 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;4383 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;4384 return idxReg;4385 }4386 fVars &= ~RT_BIT_32(idxVar);4387 }4388 }4389 4390 return UINT8_MAX;4391 }4392 4393 4394 /**4395 * Reassigns a variable to a different register specified by the caller.4396 *4397 * @returns The new code buffer position.4398 * @param pReNative The native recompile state.4399 * @param off The current code buffer position.4400 * @param idxVar The variable index.4401 * @param idxRegOld The old host register number.4402 * @param idxRegNew The new host register number.4403 * @param pszCaller The caller for logging.4404 */4405 static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,4406 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)4407 {4408 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);4409 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);4410 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR4411 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);4412 #endif4413 RT_NOREF(pszCaller);4414 4415 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4416 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));4417 #endif4418 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);4419 4420 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;4421 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4422 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));4423 #endif4424 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",4425 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));4426 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);4427 4428 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;4429 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;4430 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;4431 if (fGstRegShadows)4432 {4433 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))4434 | RT_BIT_32(idxRegNew);4435 while (fGstRegShadows)4436 {4437 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;4438 fGstRegShadows &= ~RT_BIT_64(idxGstReg);4439 4440 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);4441 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;4442 }4443 }4444 4445 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;4446 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;4447 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));4448 return off;4449 }4450 4451 4452 /**4453 * Moves a variable to a different register or spills it onto the stack.4454 *4455 * This must be a stack variable (kIemNativeVarKind_Stack) because the other4456 * kinds can easily be recreated if needed later.4457 *4458 * @returns The new code buffer position.4459 * @param pReNative The native recompile state.4460 * @param off The current code buffer position.4461 * @param idxVar The variable index.4462 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to4463 * call-volatile registers.4464 */4465 DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,4466 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)4467 {4468 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);4469 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];4470 Assert(pVar->enmKind == kIemNativeVarKind_Stack);4471 Assert(!pVar->fRegAcquired);4472 4473 uint8_t const idxRegOld = pVar->idxReg;4474 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));4475 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));4476 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);4477 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)4478 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);4479 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));4480 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))4481 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));4482 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4483 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));4484 #endif4485 4486 4487 /** @todo Add statistics on this.*/4488 /** @todo Implement basic variable liveness analysis (python) so variables4489 * can be freed immediately once no longer used. This has the potential to4490 * be trashing registers and stack for dead variables.4491 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */4492 4493 /*4494 * First try move it to a different register, as that's cheaper.4495 */4496 fForbiddenRegs |= RT_BIT_32(idxRegOld);4497 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;4498 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;4499 if (fRegs)4500 {4501 /* Avoid using shadow registers, if possible. */4502 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)4503 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;4504 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;4505 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");4506 }4507 4508 /*4509 * Otherwise we must spill the register onto the stack.4510 */4511 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);4512 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",4513 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));4514 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);4515 4516 pVar->idxReg = UINT8_MAX;4517 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);4518 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);4519 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;4520 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;4521 return off;4522 }4523 4524 4525 /**4526 * Allocates a temporary host general purpose register.4527 *4528 * This may emit code to save register content onto the stack in order to free4529 * up a register.4530 *4531 * @returns The host register number; throws VBox status code on failure,4532 * so no need to check the return value.4533 * @param pReNative The native recompile state.4534 * @param poff Pointer to the variable with the code buffer position.4535 * This will be update if we need to move a variable from4536 * register to stack in order to satisfy the request.4537 * @param fPreferVolatile Whether to prefer volatile over non-volatile4538 * registers (@c true, default) or the other way around4539 * (@c false, for iemNativeRegAllocTmpForGuestReg()).4540 */4541 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)4542 {4543 /*4544 * Try find a completely unused register, preferably a call-volatile one.4545 */4546 uint8_t idxReg;4547 uint32_t fRegs = ~pReNative->Core.bmHstRegs4548 & ~pReNative->Core.bmHstRegsWithGstShadow4549 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);4550 if (fRegs)4551 {4552 if (fPreferVolatile)4553 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK4554 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;4555 else4556 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK4557 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;4558 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);4559 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));4560 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));4561 }4562 else4563 {4564 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);4565 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));4566 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));4567 }4568 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);4569 }4570 4571 4572 /**4573 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable4574 * registers.4575 *4576 * @returns The host register number; throws VBox status code on failure,4577 * so no need to check the return value.4578 * @param pReNative The native recompile state.4579 * @param poff Pointer to the variable with the code buffer position.4580 * This will be update if we need to move a variable from4581 * register to stack in order to satisfy the request.4582 * @param fRegMask Mask of acceptable registers.4583 * @param fPreferVolatile Whether to prefer volatile over non-volatile4584 * registers (@c true, default) or the other way around4585 * (@c false, for iemNativeRegAllocTmpForGuestReg()).4586 */4587 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,4588 bool fPreferVolatile /*= true*/)4589 {4590 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));4591 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));4592 4593 /*4594 * Try find a completely unused register, preferably a call-volatile one.4595 */4596 uint8_t idxReg;4597 uint32_t fRegs = ~pReNative->Core.bmHstRegs4598 & ~pReNative->Core.bmHstRegsWithGstShadow4599 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)4600 & fRegMask;4601 if (fRegs)4602 {4603 if (fPreferVolatile)4604 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK4605 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;4606 else4607 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK4608 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;4609 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);4610 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));4611 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));4612 }4613 else4614 {4615 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);4616 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));4617 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));4618 }4619 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);4620 }4621 4622 4623 /**4624 * Allocates a temporary register for loading an immediate value into.4625 *4626 * This will emit code to load the immediate, unless there happens to be an4627 * unused register with the value already loaded.4628 *4629 * The caller will not modify the returned register, it must be considered4630 * read-only. Free using iemNativeRegFreeTmpImm.4631 *4632 * @returns The host register number; throws VBox status code on failure, so no4633 * need to check the return value.4634 * @param pReNative The native recompile state.4635 * @param poff Pointer to the variable with the code buffer position.4636 * @param uImm The immediate value that the register must hold upon4637 * return.4638 * @param fPreferVolatile Whether to prefer volatile over non-volatile4639 * registers (@c true, default) or the other way around4640 * (@c false).4641 *4642 * @note Reusing immediate values has not been implemented yet.4643 */4644 DECL_HIDDEN_THROW(uint8_t)4645 iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)4646 {4647 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);4648 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);4649 return idxReg;4650 }4651 4652 4653 /**4654 * Allocates a temporary host general purpose register for keeping a guest4655 * register value.4656 *4657 * Since we may already have a register holding the guest register value,4658 * code will be emitted to do the loading if that's not the case. Code may also4659 * be emitted if we have to free up a register to satify the request.4660 *4661 * @returns The host register number; throws VBox status code on failure, so no4662 * need to check the return value.4663 * @param pReNative The native recompile state.4664 * @param poff Pointer to the variable with the code buffer4665 * position. This will be update if we need to move a4666 * variable from register to stack in order to satisfy4667 * the request.4668 * @param enmGstReg The guest register that will is to be updated.4669 * @param enmIntendedUse How the caller will be using the host register.4670 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any4671 * register is okay (default). The ASSUMPTION here is4672 * that the caller has already flushed all volatile4673 * registers, so this is only applied if we allocate a4674 * new register.4675 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.4676 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent4677 */4678 DECL_HIDDEN_THROW(uint8_t)4679 iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,4680 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,4681 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)4682 {4683 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);4684 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS4685 AssertMsg( fSkipLivenessAssert4686 || pReNative->idxCurCall == 04687 || enmGstReg == kIemNativeGstReg_Pc4688 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite4689 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))4690 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate4691 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))4692 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),4693 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));4694 #endif4695 RT_NOREF(fSkipLivenessAssert);4696 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)4697 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };4698 #endif4699 uint32_t const fRegMask = !fNoVolatileRegs4700 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK4701 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;4702 4703 /*4704 * First check if the guest register value is already in a host register.4705 */4706 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))4707 {4708 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];4709 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));4710 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));4711 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));4712 4713 /* It's not supposed to be allocated... */4714 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))4715 {4716 /*4717 * If the register will trash the guest shadow copy, try find a4718 * completely unused register we can use instead. If that fails,4719 * we need to disassociate the host reg from the guest reg.4720 */4721 /** @todo would be nice to know if preserving the register is in any way helpful. */4722 /* If the purpose is calculations, try duplicate the register value as4723 we'll be clobbering the shadow. */4724 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation4725 && ( ~pReNative->Core.bmHstRegs4726 & ~pReNative->Core.bmHstRegsWithGstShadow4727 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))4728 {4729 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);4730 4731 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);4732 4733 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",4734 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,4735 g_apszIemNativeHstRegNames[idxRegNew]));4736 idxReg = idxRegNew;4737 }4738 /* If the current register matches the restrictions, go ahead and allocate4739 it for the caller. */4740 else if (fRegMask & RT_BIT_32(idxReg))4741 {4742 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);4743 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;4744 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;4745 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)4746 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",4747 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));4748 else4749 {4750 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);4751 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",4752 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));4753 }4754 }4755 /* Otherwise, allocate a register that satisfies the caller and transfer4756 the shadowing if compatible with the intended use. (This basically4757 means the call wants a non-volatile register (RSP push/pop scenario).) */4758 else4759 {4760 Assert(fNoVolatileRegs);4761 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),4762 !fNoVolatileRegs4763 && enmIntendedUse == kIemNativeGstRegUse_Calculation);4764 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);4765 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)4766 {4767 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);4768 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",4769 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],4770 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));4771 }4772 else4773 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",4774 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,4775 g_apszIemNativeHstRegNames[idxRegNew]));4776 idxReg = idxRegNew;4777 }4778 }4779 else4780 {4781 /*4782 * Oops. Shadowed guest register already allocated!4783 *4784 * Allocate a new register, copy the value and, if updating, the4785 * guest shadow copy assignment to the new register.4786 */4787 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate4788 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,4789 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",4790 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));4791 4792 /** @todo share register for readonly access. */4793 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,4794 enmIntendedUse == kIemNativeGstRegUse_Calculation);4795 4796 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)4797 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);4798 4799 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate4800 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)4801 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",4802 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,4803 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));4804 else4805 {4806 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);4807 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",4808 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,4809 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));4810 }4811 idxReg = idxRegNew;4812 }4813 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */4814 4815 #ifdef VBOX_STRICT4816 /* Strict builds: Check that the value is correct. */4817 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);4818 #endif4819 4820 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4821 /** @todo r=aeichner Implement for registers other than GPR as well. */4822 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite4823 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)4824 && enmGstReg >= kIemNativeGstReg_GprFirst4825 && enmGstReg <= kIemNativeGstReg_GprLast4826 )4827 {4828 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO4829 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);4830 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);4831 # endif4832 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);4833 }4834 #endif4835 4836 return idxReg;4837 }4838 4839 /*4840 * Allocate a new register, load it with the guest value and designate it as a copy of the4841 */4842 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);4843 4844 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)4845 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);4846 4847 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)4848 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);4849 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",4850 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));4851 4852 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK4853 /** @todo r=aeichner Implement for registers other than GPR as well. */4854 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite4855 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)4856 && enmGstReg >= kIemNativeGstReg_GprFirst4857 && enmGstReg <= kIemNativeGstReg_GprLast4858 )4859 {4860 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO4861 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);4862 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);4863 # endif4864 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);4865 }4866 #endif4867 4868 return idxRegNew;4869 }4870 4871 4872 /**4873 * Allocates a temporary host general purpose register that already holds the4874 * given guest register value.4875 *4876 * The use case for this function is places where the shadowing state cannot be4877 * modified due to branching and such. This will fail if the we don't have a4878 * current shadow copy handy or if it's incompatible. The only code that will4879 * be emitted here is value checking code in strict builds.4880 *4881 * The intended use can only be readonly!4882 *4883 * @returns The host register number, UINT8_MAX if not present.4884 * @param pReNative The native recompile state.4885 * @param poff Pointer to the instruction buffer offset.4886 * Will be updated in strict builds if a register is4887 * found.4888 * @param enmGstReg The guest register that will is to be updated.4889 * @note In strict builds, this may throw instruction buffer growth failures.4890 * Non-strict builds will not throw anything.4891 * @sa iemNativeRegAllocTmpForGuestReg4892 */4893 DECL_HIDDEN_THROW(uint8_t)4894 iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)4895 {4896 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);4897 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS4898 AssertMsg( pReNative->idxCurCall == 04899 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))4900 || enmGstReg == kIemNativeGstReg_Pc,4901 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));4902 #endif4903 4904 /*4905 * First check if the guest register value is already in a host register.4906 */4907 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))4908 {4909 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];4910 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));4911 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));4912 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));4913 4914 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))4915 {4916 /*4917 * We only do readonly use here, so easy compared to the other4918 * variant of this code.4919 */4920 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);4921 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;4922 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;4923 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",4924 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));4925 4926 #ifdef VBOX_STRICT4927 /* Strict builds: Check that the value is correct. */4928 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);4929 #else4930 RT_NOREF(poff);4931 #endif4932 return idxReg;4933 }4934 }4935 4936 return UINT8_MAX;4937 }4938 4939 4940 /**4941 * Allocates argument registers for a function call.4942 *4943 * @returns New code buffer offset on success; throws VBox status code on failure, so no4944 * need to check the return value.4945 * @param pReNative The native recompile state.4946 * @param off The current code buffer offset.4947 * @param cArgs The number of arguments the function call takes.4948 */4949 DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)4950 {4951 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,4952 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));4953 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);4954 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);4955 4956 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))4957 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);4958 else if (cArgs == 0)4959 return true;4960 4961 /*4962 * Do we get luck and all register are free and not shadowing anything?4963 */4964 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)4965 for (uint32_t i = 0; i < cArgs; i++)4966 {4967 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];4968 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;4969 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;4970 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);4971 }4972 /*4973 * Okay, not lucky so we have to free up the registers.4974 */4975 else4976 for (uint32_t i = 0; i < cArgs; i++)4977 {4978 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];4979 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))4980 {4981 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)4982 {4983 case kIemNativeWhat_Var:4984 {4985 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;4986 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);4987 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),4988 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));4989 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);4990 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR4991 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);4992 #endif4993 4994 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)4995 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;4996 else4997 {4998 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);4999 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));5000 }5001 break;5002 }5003 5004 case kIemNativeWhat_Tmp:5005 case kIemNativeWhat_Arg:5006 case kIemNativeWhat_rc:5007 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));5008 default:5009 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));5010 }5011 5012 }5013 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))5014 {5015 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);5016 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)5017 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);5018 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5019 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));5020 #endif5021 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);5022 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;5023 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;5024 }5025 else5026 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);5027 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;5028 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;5029 }5030 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];5031 return true;5032 }5033 5034 5035 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);5036 5037 5038 #if 05039 /**5040 * Frees a register assignment of any type.5041 *5042 * @param pReNative The native recompile state.5043 * @param idxHstReg The register to free.5044 *5045 * @note Does not update variables.5046 */5047 DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT5048 {5049 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));5050 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));5051 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));5052 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var5053 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp5054 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg5055 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);5056 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var5057 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX5058 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));5059 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)5060 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);5061 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))5062 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));5063 5064 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);5065 /* no flushing, right:5066 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);5067 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;5068 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;5069 */5070 }5071 #endif5072 5073 5074 /**5075 * Frees a temporary register.5076 *5077 * Any shadow copies of guest registers assigned to the host register will not5078 * be flushed by this operation.5079 */5080 DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT5081 {5082 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));5083 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);5084 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);5085 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",5086 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));5087 }5088 5089 5090 /**5091 * Frees a temporary immediate register.5092 *5093 * It is assumed that the call has not modified the register, so it still hold5094 * the same value as when it was allocated via iemNativeRegAllocTmpImm().5095 */5096 DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT5097 {5098 iemNativeRegFreeTmp(pReNative, idxHstReg);5099 }5100 5101 5102 /**5103 * Frees a register assigned to a variable.5104 *5105 * The register will be disassociated from the variable.5106 */5107 DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT5108 {5109 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));5110 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);5111 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;5112 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);5113 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);5114 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR5115 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);5116 #endif5117 5118 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;5119 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);5120 if (!fFlushShadows)5121 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",5122 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));5123 else5124 {5125 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);5126 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;5127 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5128 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));5129 #endif5130 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;5131 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;5132 uint64_t fGstRegShadows = fGstRegShadowsOld;5133 while (fGstRegShadows)5134 {5135 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;5136 fGstRegShadows &= ~RT_BIT_64(idxGstReg);5137 5138 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);5139 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;5140 }5141 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",5142 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));5143 }5144 }5145 5146 5147 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR5148 # ifdef LOG_ENABLED5149 /** Host CPU SIMD register names. */5150 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =5151 {5152 # ifdef RT_ARCH_AMD645153 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"5154 # elif RT_ARCH_ARM645155 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",5156 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",5157 # else5158 # error "port me"5159 # endif5160 };5161 # endif5162 5163 5164 /**5165 * Frees a SIMD register assigned to a variable.5166 *5167 * The register will be disassociated from the variable.5168 */5169 DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT5170 {5171 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));5172 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);5173 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;5174 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);5175 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);5176 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);5177 5178 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;5179 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);5180 if (!fFlushShadows)5181 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",5182 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));5183 else5184 {5185 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);5186 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;5187 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;5188 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;5189 uint64_t fGstRegShadows = fGstRegShadowsOld;5190 while (fGstRegShadows)5191 {5192 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;5193 fGstRegShadows &= ~RT_BIT_64(idxGstReg);5194 5195 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);5196 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;5197 }5198 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",5199 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));5200 }5201 }5202 5203 5204 /**5205 * Reassigns a variable to a different SIMD register specified by the caller.5206 *5207 * @returns The new code buffer position.5208 * @param pReNative The native recompile state.5209 * @param off The current code buffer position.5210 * @param idxVar The variable index.5211 * @param idxRegOld The old host register number.5212 * @param idxRegNew The new host register number.5213 * @param pszCaller The caller for logging.5214 */5215 static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,5216 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)5217 {5218 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);5219 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);5220 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);5221 RT_NOREF(pszCaller);5222 5223 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)5224 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));5225 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);5226 5227 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;5228 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)5229 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));5230 5231 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",5232 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));5233 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);5234 5235 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))5236 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);5237 else5238 {5239 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));5240 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);5241 }5242 5243 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;5244 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;5245 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;5246 if (fGstRegShadows)5247 {5248 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))5249 | RT_BIT_32(idxRegNew);5250 while (fGstRegShadows)5251 {5252 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;5253 fGstRegShadows &= ~RT_BIT_64(idxGstReg);5254 5255 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);5256 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;5257 }5258 }5259 5260 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;5261 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;5262 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));5263 return off;5264 }5265 5266 5267 /**5268 * Moves a variable to a different register or spills it onto the stack.5269 *5270 * This must be a stack variable (kIemNativeVarKind_Stack) because the other5271 * kinds can easily be recreated if needed later.5272 *5273 * @returns The new code buffer position.5274 * @param pReNative The native recompile state.5275 * @param off The current code buffer position.5276 * @param idxVar The variable index.5277 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to5278 * call-volatile registers.5279 */5280 DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,5281 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)5282 {5283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);5284 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];5285 Assert(pVar->enmKind == kIemNativeVarKind_Stack);5286 Assert(!pVar->fRegAcquired);5287 Assert(!pVar->fSimdReg);5288 5289 uint8_t const idxRegOld = pVar->idxReg;5290 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));5291 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));5292 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);5293 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)5294 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);5295 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));5296 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))5297 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));5298 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)5299 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));5300 5301 /** @todo Add statistics on this.*/5302 /** @todo Implement basic variable liveness analysis (python) so variables5303 * can be freed immediately once no longer used. This has the potential to5304 * be trashing registers and stack for dead variables.5305 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */5306 5307 /*5308 * First try move it to a different register, as that's cheaper.5309 */5310 fForbiddenRegs |= RT_BIT_32(idxRegOld);5311 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;5312 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;5313 if (fRegs)5314 {5315 /* Avoid using shadow registers, if possible. */5316 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)5317 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;5318 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;5319 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");5320 }5321 5322 /*5323 * Otherwise we must spill the register onto the stack.5324 */5325 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);5326 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",5327 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));5328 5329 if (pVar->cbVar == sizeof(RTUINT128U))5330 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);5331 else5332 {5333 Assert(pVar->cbVar == sizeof(RTUINT256U));5334 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);5335 }5336 5337 pVar->idxReg = UINT8_MAX;5338 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);5339 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);5340 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;5341 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;5342 return off;5343 }5344 5345 5346 /**5347 * Called right before emitting a call instruction to move anything important5348 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,5349 * optionally freeing argument variables.5350 *5351 * @returns New code buffer offset, UINT32_MAX on failure.5352 * @param pReNative The native recompile state.5353 * @param off The code buffer offset.5354 * @param cArgs The number of arguments the function call takes.5355 * It is presumed that the host register part of these have5356 * been allocated as such already and won't need moving,5357 * just freeing.5358 * @param fKeepVars Mask of variables that should keep their register5359 * assignments. Caller must take care to handle these.5360 */5361 DECL_HIDDEN_THROW(uint32_t)5362 iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)5363 {5364 Assert(!cArgs); RT_NOREF(cArgs);5365 5366 /* fKeepVars will reduce this mask. */5367 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;5368 5369 /*5370 * Move anything important out of volatile registers.5371 */5372 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK5373 #ifdef IEMNATIVE_SIMD_REG_FIXED_TMP05374 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)5375 #endif5376 ;5377 5378 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;5379 if (!fSimdRegsToMove)5380 { /* likely */ }5381 else5382 {5383 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));5384 while (fSimdRegsToMove != 0)5385 {5386 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;5387 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);5388 5389 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)5390 {5391 case kIemNativeWhat_Var:5392 {5393 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;5394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);5395 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];5396 Assert(pVar->idxReg == idxSimdReg);5397 Assert(pVar->fSimdReg);5398 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))5399 {5400 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",5401 idxVar, pVar->enmKind, pVar->idxReg));5402 if (pVar->enmKind != kIemNativeVarKind_Stack)5403 pVar->idxReg = UINT8_MAX;5404 else5405 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);5406 }5407 else5408 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);5409 continue;5410 }5411 5412 case kIemNativeWhat_Arg:5413 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));5414 continue;5415 5416 case kIemNativeWhat_rc:5417 case kIemNativeWhat_Tmp:5418 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));5419 continue;5420 5421 case kIemNativeWhat_FixedReserved:5422 #ifdef RT_ARCH_ARM645423 continue; /* On ARM the upper half of the virtual 256-bit register. */5424 #endif5425 5426 case kIemNativeWhat_FixedTmp:5427 case kIemNativeWhat_pVCpuFixed:5428 case kIemNativeWhat_pCtxFixed:5429 case kIemNativeWhat_PcShadow:5430 case kIemNativeWhat_Invalid:5431 case kIemNativeWhat_End:5432 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));5433 }5434 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));5435 }5436 }5437 5438 /*5439 * Do the actual freeing.5440 */5441 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)5442 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",5443 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));5444 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;5445 5446 /* If there are guest register shadows in any call-volatile register, we5447 have to clear the corrsponding guest register masks for each register. */5448 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;5449 if (fHstSimdRegsWithGstShadow)5450 {5451 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",5452 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));5453 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;5454 do5455 {5456 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;5457 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);5458 5459 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));5460 5461 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5462 /*5463 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply5464 * to call volatile registers).5465 */5466 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)5467 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)5468 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);5469 #endif5470 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)5471 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));5472 5473 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;5474 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;5475 } while (fHstSimdRegsWithGstShadow != 0);5476 }5477 5478 return off;5479 }5480 #endif5481 5482 5483 /**5484 * Called right before emitting a call instruction to move anything important5485 * out of call-volatile registers, free and flush the call-volatile registers,5486 * optionally freeing argument variables.5487 *5488 * @returns New code buffer offset, UINT32_MAX on failure.5489 * @param pReNative The native recompile state.5490 * @param off The code buffer offset.5491 * @param cArgs The number of arguments the function call takes.5492 * It is presumed that the host register part of these have5493 * been allocated as such already and won't need moving,5494 * just freeing.5495 * @param fKeepVars Mask of variables that should keep their register5496 * assignments. Caller must take care to handle these.5497 */5498 DECL_HIDDEN_THROW(uint32_t)5499 iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)5500 {5501 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);5502 5503 /* fKeepVars will reduce this mask. */5504 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;5505 5506 /*5507 * Move anything important out of volatile registers.5508 */5509 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))5510 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);5511 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK5512 #ifdef IEMNATIVE_REG_FIXED_TMP05513 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)5514 #endif5515 #ifdef IEMNATIVE_REG_FIXED_TMP15516 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)5517 #endif5518 #ifdef IEMNATIVE_REG_FIXED_PC_DBG5519 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)5520 #endif5521 & ~g_afIemNativeCallRegs[cArgs];5522 5523 fRegsToMove &= pReNative->Core.bmHstRegs;5524 if (!fRegsToMove)5525 { /* likely */ }5526 else5527 {5528 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));5529 while (fRegsToMove != 0)5530 {5531 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;5532 fRegsToMove &= ~RT_BIT_32(idxReg);5533 5534 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)5535 {5536 case kIemNativeWhat_Var:5537 {5538 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;5539 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);5540 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];5541 Assert(pVar->idxReg == idxReg);5542 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR5543 Assert(!pVar->fSimdReg);5544 #endif5545 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))5546 {5547 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",5548 idxVar, pVar->enmKind, pVar->idxReg));5549 if (pVar->enmKind != kIemNativeVarKind_Stack)5550 pVar->idxReg = UINT8_MAX;5551 else5552 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);5553 }5554 else5555 fRegsToFree &= ~RT_BIT_32(idxReg);5556 continue;5557 }5558 5559 case kIemNativeWhat_Arg:5560 AssertMsgFailed(("What?!?: %u\n", idxReg));5561 continue;5562 5563 case kIemNativeWhat_rc:5564 case kIemNativeWhat_Tmp:5565 AssertMsgFailed(("Missing free: %u\n", idxReg));5566 continue;5567 5568 case kIemNativeWhat_FixedTmp:5569 case kIemNativeWhat_pVCpuFixed:5570 case kIemNativeWhat_pCtxFixed:5571 case kIemNativeWhat_PcShadow:5572 case kIemNativeWhat_FixedReserved:5573 case kIemNativeWhat_Invalid:5574 case kIemNativeWhat_End:5575 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));5576 }5577 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));5578 }5579 }5580 5581 /*5582 * Do the actual freeing.5583 */5584 if (pReNative->Core.bmHstRegs & fRegsToFree)5585 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",5586 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));5587 pReNative->Core.bmHstRegs &= ~fRegsToFree;5588 5589 /* If there are guest register shadows in any call-volatile register, we5590 have to clear the corrsponding guest register masks for each register. */5591 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;5592 if (fHstRegsWithGstShadow)5593 {5594 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",5595 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));5596 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;5597 do5598 {5599 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;5600 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);5601 5602 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));5603 5604 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5605 /*5606 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply5607 * to call volatile registers).5608 */5609 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)5610 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);5611 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));5612 #endif5613 5614 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;5615 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;5616 } while (fHstRegsWithGstShadow != 0);5617 }5618 5619 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR5620 /* Now for the SIMD registers, no argument support for now. */5621 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);5622 #endif5623 5624 return off;5625 }5626 5627 5628 /**5629 * Flushes a set of guest register shadow copies.5630 *5631 * This is usually done after calling a threaded function or a C-implementation5632 * of an instruction.5633 *5634 * @param pReNative The native recompile state.5635 * @param fGstRegs Set of guest registers to flush.5636 */5637 DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT5638 {5639 /*5640 * Reduce the mask by what's currently shadowed5641 */5642 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;5643 fGstRegs &= bmGstRegShadowsOld;5644 if (fGstRegs)5645 {5646 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;5647 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));5648 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;5649 if (bmGstRegShadowsNew)5650 {5651 /*5652 * Partial.5653 */5654 do5655 {5656 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;5657 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];5658 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));5659 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));5660 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));5661 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5662 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));5663 #endif5664 5665 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);5666 fGstRegs &= ~fInThisHstReg;5667 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;5668 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;5669 if (!fGstRegShadowsNew)5670 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);5671 } while (fGstRegs != 0);5672 }5673 else5674 {5675 /*5676 * Clear all.5677 */5678 do5679 {5680 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;5681 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];5682 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));5683 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));5684 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));5685 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5686 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));5687 #endif5688 5689 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));5690 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;5691 } while (fGstRegs != 0);5692 pReNative->Core.bmHstRegsWithGstShadow = 0;5693 }5694 }5695 }5696 5697 5698 /**5699 * Flushes guest register shadow copies held by a set of host registers.5700 *5701 * This is used with the TLB lookup code for ensuring that we don't carry on5702 * with any guest shadows in volatile registers, as these will get corrupted by5703 * a TLB miss.5704 *5705 * @param pReNative The native recompile state.5706 * @param fHstRegs Set of host registers to flush guest shadows for.5707 */5708 DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT5709 {5710 /*5711 * Reduce the mask by what's currently shadowed.5712 */5713 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;5714 fHstRegs &= bmHstRegsWithGstShadowOld;5715 if (fHstRegs)5716 {5717 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;5718 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",5719 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));5720 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;5721 if (bmHstRegsWithGstShadowNew)5722 {5723 /*5724 * Partial (likely).5725 */5726 uint64_t fGstShadows = 0;5727 do5728 {5729 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;5730 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));5731 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)5732 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);5733 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5734 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));5735 #endif5736 5737 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;5738 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;5739 fHstRegs &= ~RT_BIT_32(idxHstReg);5740 } while (fHstRegs != 0);5741 pReNative->Core.bmGstRegShadows &= ~fGstShadows;5742 }5743 else5744 {5745 /*5746 * Clear all.5747 */5748 do5749 {5750 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;5751 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));5752 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)5753 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);5754 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5755 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));5756 #endif5757 5758 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;5759 fHstRegs &= ~RT_BIT_32(idxHstReg);5760 } while (fHstRegs != 0);5761 pReNative->Core.bmGstRegShadows = 0;5762 }5763 }5764 }5765 5766 5767 /**5768 * Restores guest shadow copies in volatile registers.5769 *5770 * This is used after calling a helper function (think TLB miss) to restore the5771 * register state of volatile registers.5772 *5773 * @param pReNative The native recompile state.5774 * @param off The code buffer offset.5775 * @param fHstRegsActiveShadows Set of host registers which are allowed to5776 * be active (allocated) w/o asserting. Hack.5777 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),5778 * iemNativeVarRestoreVolatileRegsPostHlpCall()5779 */5780 DECL_HIDDEN_THROW(uint32_t)5781 iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)5782 {5783 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;5784 if (fHstRegs)5785 {5786 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));5787 do5788 {5789 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;5790 5791 /* It's not fatal if a register is active holding a variable that5792 shadowing a guest register, ASSUMING all pending guest register5793 writes were flushed prior to the helper call. However, we'll be5794 emitting duplicate restores, so it wasts code space. */5795 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));5796 RT_NOREF(fHstRegsActiveShadows);5797 5798 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;5799 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5800 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));5801 #endif5802 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);5803 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),5804 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));5805 5806 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;5807 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);5808 5809 fHstRegs &= ~RT_BIT_32(idxHstReg);5810 } while (fHstRegs != 0);5811 }5812 return off;5813 }5814 5815 5816 5817 5818 /*********************************************************************************************************************************5819 * SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *5820 *********************************************************************************************************************************/5821 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR5822 5823 /**5824 * Info about shadowed guest SIMD register values.5825 * @see IEMNATIVEGSTSIMDREG5826 */5827 static struct5828 {5829 /** Offset in VMCPU of XMM (low 128-bit) registers. */5830 uint32_t offXmm;5831 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */5832 uint32_t offYmm;5833 /** Name (for logging). */5834 const char *pszName;5835 } const g_aGstSimdShadowInfo[] =5836 {5837 #define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \5838 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])5839 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },5840 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },5841 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },5842 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },5843 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },5844 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },5845 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },5846 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },5847 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },5848 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },5849 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },5850 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },5851 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },5852 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },5853 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },5854 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },5855 #undef CPUMCTX_OFF_AND_SIZE5856 };5857 AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);5858 5859 5860 /**5861 * Frees a temporary SIMD register.5862 *5863 * Any shadow copies of guest registers assigned to the host register will not5864 * be flushed by this operation.5865 */5866 DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT5867 {5868 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));5869 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);5870 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);5871 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",5872 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));5873 }5874 5875 5876 /**5877 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.5878 *5879 * @returns New code bufferoffset.5880 * @param pReNative The native recompile state.5881 * @param off Current code buffer position.5882 * @param enmGstSimdReg The guest SIMD register to flush.5883 */5884 DECL_HIDDEN_THROW(uint32_t)5885 iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)5886 {5887 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];5888 5889 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",5890 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],5891 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),5892 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));5893 5894 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))5895 {5896 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_2565897 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);5898 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);5899 }5900 5901 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))5902 {5903 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_2565904 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);5905 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);5906 }5907 5908 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);5909 return off;5910 }5911 5912 5913 /**5914 * Flush the given set of guest SIMD registers if marked as dirty.5915 *5916 * @returns New code buffer offset.5917 * @param pReNative The native recompile state.5918 * @param off Current code buffer position.5919 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).5920 */5921 DECL_HIDDEN_THROW(uint32_t)5922 iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)5923 {5924 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)5925 & fFlushGstSimdReg;5926 if (bmGstSimdRegShadowDirty)5927 {5928 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO5929 iemNativeDbgInfoAddNativeOffset(pReNative, off);5930 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);5931 # endif5932 5933 do5934 {5935 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;5936 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);5937 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));5938 } while (bmGstSimdRegShadowDirty);5939 }5940 5941 return off;5942 }5943 5944 5945 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK5946 /**5947 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.5948 *5949 * @returns New code buffer offset.5950 * @param pReNative The native recompile state.5951 * @param off Current code buffer position.5952 * @param idxHstSimdReg The host SIMD register.5953 *5954 * @note This doesn't do any unshadowing of guest registers from the host register.5955 */5956 DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)5957 {5958 /* We need to flush any pending guest register writes this host register shadows. */5959 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)5960 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;5961 if (bmGstSimdRegShadowDirty)5962 {5963 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO5964 iemNativeDbgInfoAddNativeOffset(pReNative, off);5965 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);5966 # endif5967 5968 do5969 {5970 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;5971 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);5972 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));5973 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));5974 } while (bmGstSimdRegShadowDirty);5975 }5976 5977 return off;5978 }5979 #endif5980 5981 5982 /**5983 * Locate a register, possibly freeing one up.5984 *5985 * This ASSUMES the caller has done the minimal/optimal allocation checks and5986 * failed.5987 *5988 * @returns Host register number on success. Returns UINT8_MAX if no registers5989 * found, the caller is supposed to deal with this and raise a5990 * allocation type specific status code (if desired).5991 *5992 * @throws VBox status code if we're run into trouble spilling a variable of5993 * recording debug info. Does NOT throw anything if we're out of5994 * registers, though.5995 */5996 static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,5997 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)5998 {5999 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);6000 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));6001 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));6002 6003 /*6004 * Try a freed register that's shadowing a guest register.6005 */6006 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;6007 if (fRegs)6008 {6009 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);6010 6011 #if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */6012 /*6013 * When we have livness information, we use it to kick out all shadowed6014 * guest register that will not be needed any more in this TB. If we're6015 * lucky, this may prevent us from ending up here again.6016 *6017 * Note! We must consider the previous entry here so we don't free6018 * anything that the current threaded function requires (current6019 * entry is produced by the next threaded function).6020 */6021 uint32_t const idxCurCall = pReNative->idxCurCall;6022 if (idxCurCall > 0)6023 {6024 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];6025 6026 # ifndef IEMLIVENESS_EXTENDED_LAYOUT6027 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */6028 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);6029 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */6030 #else6031 /* Construct a mask of the registers not in the read or write state.6032 Note! We could skips writes, if they aren't from us, as this is just6033 a hack to prevent trashing registers that have just been written6034 or will be written when we retire the current instruction. */6035 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm646036 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm646037 & IEMLIVENESSBIT_MASK;6038 #endif6039 /* If it matches any shadowed registers. */6040 if (pReNative->Core.bmGstRegShadows & fToFreeMask)6041 {6042 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);6043 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);6044 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */6045 6046 /* See if we've got any unshadowed registers we can return now. */6047 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;6048 if (fUnshadowedRegs)6049 {6050 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);6051 return (fPreferVolatile6052 ? ASMBitFirstSetU32(fUnshadowedRegs)6053 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK6054 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))6055 - 1;6056 }6057 }6058 }6059 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */6060 6061 unsigned const idxReg = (fPreferVolatile6062 ? ASMBitFirstSetU32(fRegs)6063 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK6064 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))6065 - 1;6066 6067 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);6068 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)6069 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);6070 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));6071 6072 /* We need to flush any pending guest register writes this host SIMD register shadows. */6073 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);6074 6075 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);6076 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;6077 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;6078 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;6079 return idxReg;6080 }6081 6082 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */6083 6084 /*6085 * Try free up a variable that's in a register.6086 *6087 * We do two rounds here, first evacuating variables we don't need to be6088 * saved on the stack, then in the second round move things to the stack.6089 */6090 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);6091 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)6092 {6093 uint32_t fVars = pReNative->Core.bmVars;6094 while (fVars)6095 {6096 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;6097 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;6098 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */6099 continue;6100 6101 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)6102 && (RT_BIT_32(idxReg) & fRegMask)6103 && ( iLoop == 06104 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack6105 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)6106 && !pReNative->Core.aVars[idxVar].fRegAcquired)6107 {6108 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));6109 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)6110 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);6111 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));6112 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))6113 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));6114 6115 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)6116 {6117 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));6118 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);6119 }6120 6121 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;6122 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);6123 6124 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);6125 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;6126 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;6127 return idxReg;6128 }6129 fVars &= ~RT_BIT_32(idxVar);6130 }6131 }6132 6133 AssertFailed();6134 return UINT8_MAX;6135 }6136 6137 6138 /**6139 * Flushes a set of guest register shadow copies.6140 *6141 * This is usually done after calling a threaded function or a C-implementation6142 * of an instruction.6143 *6144 * @param pReNative The native recompile state.6145 * @param fGstSimdRegs Set of guest SIMD registers to flush.6146 */6147 DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT6148 {6149 /*6150 * Reduce the mask by what's currently shadowed6151 */6152 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;6153 fGstSimdRegs &= bmGstSimdRegShadows;6154 if (fGstSimdRegs)6155 {6156 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;6157 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));6158 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;6159 if (bmGstSimdRegShadowsNew)6160 {6161 /*6162 * Partial.6163 */6164 do6165 {6166 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;6167 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];6168 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));6169 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));6170 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));6171 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));6172 6173 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);6174 fGstSimdRegs &= ~fInThisHstReg;6175 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;6176 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;6177 if (!fGstRegShadowsNew)6178 {6179 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);6180 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;6181 }6182 } while (fGstSimdRegs != 0);6183 }6184 else6185 {6186 /*6187 * Clear all.6188 */6189 do6190 {6191 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;6192 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];6193 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));6194 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));6195 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));6196 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));6197 6198 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));6199 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;6200 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;6201 } while (fGstSimdRegs != 0);6202 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;6203 }6204 }6205 }6206 6207 6208 /**6209 * Allocates a temporary host SIMD register.6210 *6211 * This may emit code to save register content onto the stack in order to free6212 * up a register.6213 *6214 * @returns The host register number; throws VBox status code on failure,6215 * so no need to check the return value.6216 * @param pReNative The native recompile state.6217 * @param poff Pointer to the variable with the code buffer position.6218 * This will be update if we need to move a variable from6219 * register to stack in order to satisfy the request.6220 * @param fPreferVolatile Whether to prefer volatile over non-volatile6221 * registers (@c true, default) or the other way around6222 * (@c false, for iemNativeRegAllocTmpForGuestReg()).6223 */6224 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)6225 {6226 /*6227 * Try find a completely unused register, preferably a call-volatile one.6228 */6229 uint8_t idxSimdReg;6230 uint32_t fRegs = ~pReNative->Core.bmHstRegs6231 & ~pReNative->Core.bmHstRegsWithGstShadow6232 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);6233 if (fRegs)6234 {6235 if (fPreferVolatile)6236 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK6237 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;6238 else6239 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK6240 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;6241 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);6242 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));6243 6244 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;6245 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));6246 }6247 else6248 {6249 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);6250 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));6251 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));6252 }6253 6254 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);6255 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);6256 }6257 6258 6259 /**6260 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable6261 * registers.6262 *6263 * @returns The host register number; throws VBox status code on failure,6264 * so no need to check the return value.6265 * @param pReNative The native recompile state.6266 * @param poff Pointer to the variable with the code buffer position.6267 * This will be update if we need to move a variable from6268 * register to stack in order to satisfy the request.6269 * @param fRegMask Mask of acceptable registers.6270 * @param fPreferVolatile Whether to prefer volatile over non-volatile6271 * registers (@c true, default) or the other way around6272 * (@c false, for iemNativeRegAllocTmpForGuestReg()).6273 */6274 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,6275 bool fPreferVolatile /*= true*/)6276 {6277 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));6278 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));6279 6280 /*6281 * Try find a completely unused register, preferably a call-volatile one.6282 */6283 uint8_t idxSimdReg;6284 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs6285 & ~pReNative->Core.bmHstSimdRegsWithGstShadow6286 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)6287 & fRegMask;6288 if (fRegs)6289 {6290 if (fPreferVolatile)6291 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK6292 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;6293 else6294 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK6295 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;6296 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);6297 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));6298 6299 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;6300 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));6301 }6302 else6303 {6304 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);6305 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));6306 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));6307 }6308 6309 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);6310 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);6311 }6312 6313 6314 /**6315 * Sets the indiactor for which part of the given SIMD register has valid data loaded.6316 *6317 * @param pReNative The native recompile state.6318 * @param idxHstSimdReg The host SIMD register to update the state for.6319 * @param enmLoadSz The load size to set.6320 */6321 DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,6322 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)6323 {6324 /* Everything valid already? -> nothing to do. */6325 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)6326 return;6327 6328 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)6329 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;6330 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)6331 {6332 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low1286333 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)6334 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High1286335 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));6336 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;6337 }6338 }6339 6340 6341 static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,6342 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)6343 {6344 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */6345 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst6346 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)6347 {6348 # ifdef RT_ARCH_ARM646349 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */6350 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));6351 # endif6352 6353 if (idxHstSimdRegDst != idxHstSimdRegSrc)6354 {6355 switch (enmLoadSzDst)6356 {6357 case kIemNativeGstSimdRegLdStSz_256:6358 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);6359 break;6360 case kIemNativeGstSimdRegLdStSz_Low128:6361 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);6362 break;6363 case kIemNativeGstSimdRegLdStSz_High128:6364 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);6365 break;6366 default:6367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));6368 }6369 6370 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);6371 }6372 }6373 else6374 {6375 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */6376 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);6377 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);6378 }6379 6380 return off;6381 }6382 6383 6384 /**6385 * Allocates a temporary host SIMD register for keeping a guest6386 * SIMD register value.6387 *6388 * Since we may already have a register holding the guest register value,6389 * code will be emitted to do the loading if that's not the case. Code may also6390 * be emitted if we have to free up a register to satify the request.6391 *6392 * @returns The host register number; throws VBox status code on failure, so no6393 * need to check the return value.6394 * @param pReNative The native recompile state.6395 * @param poff Pointer to the variable with the code buffer6396 * position. This will be update if we need to move a6397 * variable from register to stack in order to satisfy6398 * the request.6399 * @param enmGstSimdReg The guest SIMD register that will is to be updated.6400 * @param enmIntendedUse How the caller will be using the host register.6401 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any6402 * register is okay (default). The ASSUMPTION here is6403 * that the caller has already flushed all volatile6404 * registers, so this is only applied if we allocate a6405 * new register.6406 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent6407 */6408 DECL_HIDDEN_THROW(uint8_t)6409 iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,6410 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,6411 bool fNoVolatileRegs /*= false*/)6412 {6413 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);6414 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */6415 AssertMsg( pReNative->idxCurCall == 06416 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite6417 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))6418 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate6419 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))6420 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),6421 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));6422 #endif6423 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)6424 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };6425 #endif6426 uint32_t const fRegMask = !fNoVolatileRegs6427 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK6428 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;6429 6430 /*6431 * First check if the guest register value is already in a host register.6432 */6433 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))6434 {6435 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];6436 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));6437 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));6438 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));6439 6440 /* It's not supposed to be allocated... */6441 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))6442 {6443 /*6444 * If the register will trash the guest shadow copy, try find a6445 * completely unused register we can use instead. If that fails,6446 * we need to disassociate the host reg from the guest reg.6447 */6448 /** @todo would be nice to know if preserving the register is in any way helpful. */6449 /* If the purpose is calculations, try duplicate the register value as6450 we'll be clobbering the shadow. */6451 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation6452 && ( ~pReNative->Core.bmHstSimdRegs6453 & ~pReNative->Core.bmHstSimdRegsWithGstShadow6454 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))6455 {6456 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);6457 6458 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);6459 6460 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",6461 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,6462 g_apszIemNativeHstSimdRegNames[idxRegNew]));6463 idxSimdReg = idxRegNew;6464 }6465 /* If the current register matches the restrictions, go ahead and allocate6466 it for the caller. */6467 else if (fRegMask & RT_BIT_32(idxSimdReg))6468 {6469 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);6470 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;6471 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)6472 {6473 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)6474 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);6475 else6476 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);6477 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",6478 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));6479 }6480 else6481 {6482 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);6483 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",6484 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));6485 }6486 }6487 /* Otherwise, allocate a register that satisfies the caller and transfer6488 the shadowing if compatible with the intended use. (This basically6489 means the call wants a non-volatile register (RSP push/pop scenario).) */6490 else6491 {6492 Assert(fNoVolatileRegs);6493 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),6494 !fNoVolatileRegs6495 && enmIntendedUse == kIemNativeGstRegUse_Calculation);6496 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);6497 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)6498 {6499 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);6500 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",6501 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],6502 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));6503 }6504 else6505 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",6506 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,6507 g_apszIemNativeHstSimdRegNames[idxRegNew]));6508 idxSimdReg = idxRegNew;6509 }6510 }6511 else6512 {6513 /*6514 * Oops. Shadowed guest register already allocated!6515 *6516 * Allocate a new register, copy the value and, if updating, the6517 * guest shadow copy assignment to the new register.6518 */6519 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate6520 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,6521 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",6522 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));6523 6524 /** @todo share register for readonly access. */6525 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,6526 enmIntendedUse == kIemNativeGstRegUse_Calculation);6527 6528 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)6529 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);6530 else6531 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);6532 6533 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate6534 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)6535 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",6536 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,6537 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));6538 else6539 {6540 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);6541 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",6542 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,6543 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));6544 }6545 idxSimdReg = idxRegNew;6546 }6547 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */6548 6549 #ifdef VBOX_STRICT6550 /* Strict builds: Check that the value is correct. */6551 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)6552 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);6553 #endif6554 6555 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite6556 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)6557 {6558 # if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)6559 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);6560 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);6561 # endif6562 6563 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)6564 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);6565 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)6566 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);6567 else6568 {6569 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);6570 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);6571 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);6572 }6573 }6574 6575 return idxSimdReg;6576 }6577 6578 /*6579 * Allocate a new register, load it with the guest value and designate it as a copy of the6580 */6581 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);6582 6583 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)6584 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);6585 else6586 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);6587 6588 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)6589 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);6590 6591 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite6592 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)6593 {6594 # if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)6595 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);6596 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);6597 # endif6598 6599 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)6600 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);6601 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)6602 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);6603 else6604 {6605 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);6606 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);6607 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);6608 }6609 }6610 6611 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",6612 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));6613 6614 return idxRegNew;6615 }6616 6617 6618 /**6619 * Flushes guest SIMD register shadow copies held by a set of host registers.6620 *6621 * This is used whenever calling an external helper for ensuring that we don't carry on6622 * with any guest shadows in volatile registers, as these will get corrupted by the caller.6623 *6624 * @param pReNative The native recompile state.6625 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.6626 */6627 DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT6628 {6629 /*6630 * Reduce the mask by what's currently shadowed.6631 */6632 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;6633 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;6634 if (fHstSimdRegs)6635 {6636 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;6637 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",6638 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));6639 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;6640 if (bmHstSimdRegsWithGstShadowNew)6641 {6642 /*6643 * Partial (likely).6644 */6645 uint64_t fGstShadows = 0;6646 do6647 {6648 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;6649 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));6650 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)6651 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);6652 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)6653 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));6654 6655 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;6656 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;6657 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);6658 } while (fHstSimdRegs != 0);6659 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;6660 }6661 else6662 {6663 /*6664 * Clear all.6665 */6666 do6667 {6668 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;6669 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));6670 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)6671 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);6672 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)6673 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));6674 6675 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;6676 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);6677 } while (fHstSimdRegs != 0);6678 pReNative->Core.bmGstSimdRegShadows = 0;6679 }6680 }6681 }6682 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */6683 6684 6685 6686 /*********************************************************************************************************************************6687 * Code emitters for flushing pending guest register writes and sanity checks *6688 *********************************************************************************************************************************/6689 6690 #ifdef VBOX_STRICT6691 /**6692 * Does internal register allocator sanity checks.6693 */6694 DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)6695 {6696 /*6697 * Iterate host registers building a guest shadowing set.6698 */6699 uint64_t bmGstRegShadows = 0;6700 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;6701 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));6702 while (bmHstRegsWithGstShadow)6703 {6704 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;6705 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));6706 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);6707 6708 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;6709 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));6710 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));6711 bmGstRegShadows |= fThisGstRegShadows;6712 while (fThisGstRegShadows)6713 {6714 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;6715 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);6716 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,6717 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",6718 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));6719 }6720 }6721 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,6722 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,6723 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));6724 6725 /*6726 * Now the other way around, checking the guest to host index array.6727 */6728 bmHstRegsWithGstShadow = 0;6729 bmGstRegShadows = pReNative->Core.bmGstRegShadows;6730 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));6731 while (bmGstRegShadows)6732 {6733 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;6734 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));6735 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);6736 6737 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];6738 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));6739 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),6740 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",6741 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));6742 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);6743 }6744 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,6745 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,6746 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));6747 }6748 #endif /* VBOX_STRICT */6749 6750 6751 /**6752 * Flushes any delayed guest register writes.6753 *6754 * This must be called prior to calling CImpl functions and any helpers that use6755 * the guest state (like raising exceptions) and such.6756 *6757 * @note This function does not flush any shadowing information for guest registers. This needs to be done by6758 * the caller if it wishes to do so.6759 */6760 DECL_HIDDEN_THROW(uint32_t)6761 iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)6762 {6763 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING6764 if (!(fGstShwExcept & kIemNativeGstReg_Pc))6765 off = iemNativeEmitPcWriteback(pReNative, off);6766 #else6767 RT_NOREF(pReNative, fGstShwExcept);6768 #endif6769 6770 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK6771 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);6772 #endif6773 6774 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR6775 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);6776 #endif6777 6778 return off;6779 }6780 6781 6782 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING6783 /**6784 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.6785 */6786 DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)6787 {6788 Assert(pReNative->Core.offPc);6789 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO6790 iemNativeDbgInfoAddNativeOffset(pReNative, off);6791 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);6792 # endif6793 6794 # ifndef IEMNATIVE_REG_FIXED_PC_DBG6795 /* Allocate a temporary PC register. */6796 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);6797 6798 /* Perform the addition and store the result. */6799 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);6800 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));6801 6802 /* Free but don't flush the PC register. */6803 iemNativeRegFreeTmp(pReNative, idxPcReg);6804 # else6805 /* Compare the shadow with the context value, they should match. */6806 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);6807 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);6808 # endif6809 6810 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);6811 pReNative->Core.offPc = 0;6812 pReNative->Core.cInstrPcUpdateSkipped = 0;6813 6814 return off;6815 }6816 #endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */6817 6818 6819 /*********************************************************************************************************************************6820 * Code Emitters (larger snippets) *6821 *********************************************************************************************************************************/6822 6823 /**6824 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero6825 * extending to 64-bit width.6826 *6827 * @returns New code buffer offset on success, UINT32_MAX on failure.6828 * @param pReNative .6829 * @param off The current code buffer position.6830 * @param idxHstReg The host register to load the guest register value into.6831 * @param enmGstReg The guest register to load.6832 *6833 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,6834 * that is something the caller needs to do if applicable.6835 */6836 DECL_HIDDEN_THROW(uint32_t)6837 iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)6838 {6839 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);6840 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);6841 6842 switch (g_aGstShadowInfo[enmGstReg].cb)6843 {6844 case sizeof(uint64_t):6845 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);6846 case sizeof(uint32_t):6847 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);6848 case sizeof(uint16_t):6849 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);6850 #if 0 /* not present in the table. */6851 case sizeof(uint8_t):6852 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);6853 #endif6854 default:6855 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));6856 }6857 }6858 6859 6860 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR6861 /**6862 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.6863 *6864 * @returns New code buffer offset on success, UINT32_MAX on failure.6865 * @param pReNative The recompiler state.6866 * @param off The current code buffer position.6867 * @param idxHstSimdReg The host register to load the guest register value into.6868 * @param enmGstSimdReg The guest register to load.6869 * @param enmLoadSz The load size of the register.6870 *6871 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,6872 * that is something the caller needs to do if applicable.6873 */6874 DECL_HIDDEN_THROW(uint32_t)6875 iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,6876 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)6877 {6878 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));6879 6880 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);6881 switch (enmLoadSz)6882 {6883 case kIemNativeGstSimdRegLdStSz_256:6884 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);6885 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);6886 case kIemNativeGstSimdRegLdStSz_Low128:6887 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);6888 case kIemNativeGstSimdRegLdStSz_High128:6889 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);6890 default:6891 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));6892 }6893 }6894 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */6895 6896 #ifdef VBOX_STRICT6897 6898 /**6899 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.6900 *6901 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.6902 * Trashes EFLAGS on AMD64.6903 */6904 DECL_HIDDEN_THROW(uint32_t)6905 iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)6906 {6907 # ifdef RT_ARCH_AMD646908 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);6909 6910 /* rol reg64, 32 */6911 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);6912 pbCodeBuf[off++] = 0xc1;6913 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);6914 pbCodeBuf[off++] = 32;6915 6916 /* test reg32, ffffffffh */6917 if (idxReg >= 8)6918 pbCodeBuf[off++] = X86_OP_REX_B;6919 pbCodeBuf[off++] = 0xf7;6920 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);6921 pbCodeBuf[off++] = 0xff;6922 pbCodeBuf[off++] = 0xff;6923 pbCodeBuf[off++] = 0xff;6924 pbCodeBuf[off++] = 0xff;6925 6926 /* je/jz +1 */6927 pbCodeBuf[off++] = 0x74;6928 pbCodeBuf[off++] = 0x01;6929 6930 /* int3 */6931 pbCodeBuf[off++] = 0xcc;6932 6933 /* rol reg64, 32 */6934 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);6935 pbCodeBuf[off++] = 0xc1;6936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);6937 pbCodeBuf[off++] = 32;6938 6939 # elif defined(RT_ARCH_ARM64)6940 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);6941 /* lsr tmp0, reg64, #32 */6942 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);6943 /* cbz tmp0, +1 */6944 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);6945 /* brk #0x1100 */6946 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));6947 6948 # else6949 # error "Port me!"6950 # endif6951 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);6952 return off;6953 }6954 6955 6956 /**6957 * Emitting code that checks that the content of register @a idxReg is the same6958 * as what's in the guest register @a enmGstReg, resulting in a breakpoint6959 * instruction if that's not the case.6960 *6961 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.6962 * Trashes EFLAGS on AMD64.6963 */6964 DECL_HIDDEN_THROW(uint32_t)6965 iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)6966 {6967 #if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)6968 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */6969 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))6970 return off;6971 #endif6972 6973 # ifdef RT_ARCH_AMD646974 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);6975 6976 /* cmp reg, [mem] */6977 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))6978 {6979 if (idxReg >= 8)6980 pbCodeBuf[off++] = X86_OP_REX_R;6981 pbCodeBuf[off++] = 0x38;6982 }6983 else6984 {6985 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))6986 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);6987 else6988 {6989 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))6990 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;6991 else6992 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),6993 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));6994 if (idxReg >= 8)6995 pbCodeBuf[off++] = X86_OP_REX_R;6996 }6997 pbCodeBuf[off++] = 0x39;6998 }6999 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);7000 7001 /* je/jz +1 */7002 pbCodeBuf[off++] = 0x74;7003 pbCodeBuf[off++] = 0x01;7004 7005 /* int3 */7006 pbCodeBuf[off++] = 0xcc;7007 7008 /* For values smaller than the register size, we must check that the rest7009 of the register is all zeros. */7010 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))7011 {7012 /* test reg64, imm32 */7013 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);7014 pbCodeBuf[off++] = 0xf7;7015 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);7016 pbCodeBuf[off++] = 0;7017 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;7018 pbCodeBuf[off++] = 0xff;7019 pbCodeBuf[off++] = 0xff;7020 7021 /* je/jz +1 */7022 pbCodeBuf[off++] = 0x74;7023 pbCodeBuf[off++] = 0x01;7024 7025 /* int3 */7026 pbCodeBuf[off++] = 0xcc;7027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7028 }7029 else7030 {7031 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7032 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))7033 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);7034 }7035 7036 # elif defined(RT_ARCH_ARM64)7037 /* mov TMP0, [gstreg] */7038 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);7039 7040 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);7041 /* sub tmp0, tmp0, idxReg */7042 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);7043 /* cbz tmp0, +1 */7044 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);7045 /* brk #0x1000+enmGstReg */7046 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));7047 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7048 7049 # else7050 # error "Port me!"7051 # endif7052 return off;7053 }7054 7055 7056 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR7057 # ifdef RT_ARCH_AMD647058 /**7059 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.7060 */7061 DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)7062 {7063 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */7064 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;7065 if (idxSimdReg >= 8)7066 pbCodeBuf[off++] = X86_OP_REX_R;7067 pbCodeBuf[off++] = 0x0f;7068 pbCodeBuf[off++] = 0x38;7069 pbCodeBuf[off++] = 0x29;7070 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);7071 7072 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */7073 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;7074 pbCodeBuf[off++] = X86_OP_REX_W7075 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)7076 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);7077 pbCodeBuf[off++] = 0x0f;7078 pbCodeBuf[off++] = 0x3a;7079 pbCodeBuf[off++] = 0x16;7080 pbCodeBuf[off++] = 0xeb;7081 pbCodeBuf[off++] = 0x00;7082 7083 /* cmp tmp0, 0xffffffffffffffff. */7084 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);7085 pbCodeBuf[off++] = 0x83;7086 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);7087 pbCodeBuf[off++] = 0xff;7088 7089 /* je/jz +1 */7090 pbCodeBuf[off++] = 0x74;7091 pbCodeBuf[off++] = 0x01;7092 7093 /* int3 */7094 pbCodeBuf[off++] = 0xcc;7095 7096 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */7097 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;7098 pbCodeBuf[off++] = X86_OP_REX_W7099 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)7100 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);7101 pbCodeBuf[off++] = 0x0f;7102 pbCodeBuf[off++] = 0x3a;7103 pbCodeBuf[off++] = 0x16;7104 pbCodeBuf[off++] = 0xeb;7105 pbCodeBuf[off++] = 0x01;7106 7107 /* cmp tmp0, 0xffffffffffffffff. */7108 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);7109 pbCodeBuf[off++] = 0x83;7110 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);7111 pbCodeBuf[off++] = 0xff;7112 7113 /* je/jz +1 */7114 pbCodeBuf[off++] = 0x74;7115 pbCodeBuf[off++] = 0x01;7116 7117 /* int3 */7118 pbCodeBuf[off++] = 0xcc;7119 7120 return off;7121 }7122 # endif7123 7124 7125 /**7126 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same7127 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint7128 * instruction if that's not the case.7129 *7130 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.7131 * Trashes EFLAGS on AMD64.7132 */7133 DECL_HIDDEN_THROW(uint32_t)7134 iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,7135 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)7136 {7137 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */7138 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_2567139 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)7140 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))7141 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low1287142 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))7143 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High1287144 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))7145 return off;7146 7147 # ifdef RT_ARCH_AMD647148 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)7149 {7150 /* movdqa vectmp0, idxSimdReg */7151 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);7152 7153 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);7154 7155 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,7156 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);7157 }7158 7159 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)7160 {7161 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */7162 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);7163 7164 /* vextracti128 vectmp0, idxSimdReg, 1 */7165 pbCodeBuf[off++] = X86_OP_VEX3;7166 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)7167 | X86_OP_VEX3_BYTE1_X7168 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)7169 | 0x03; /* Opcode map */7170 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);7171 pbCodeBuf[off++] = 0x39;7172 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);7173 pbCodeBuf[off++] = 0x01;7174 7175 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,7176 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);7177 }7178 # elif defined(RT_ARCH_ARM64)7179 /* mov vectmp0, [gstreg] */7180 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);7181 7182 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)7183 {7184 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);7185 /* eor vectmp0, vectmp0, idxSimdReg */7186 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);7187 /* uaddlv vectmp0, vectmp0.16B */7188 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);7189 /* umov tmp0, vectmp0.H[0] */7190 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,7191 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);7192 /* cbz tmp0, +1 */7193 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);7194 /* brk #0x1000+enmGstReg */7195 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));7196 }7197 7198 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)7199 {7200 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);7201 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */7202 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);7203 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */7204 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);7205 /* umov tmp0, (vectmp0 + 1).H[0] */7206 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,7207 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);7208 /* cbz tmp0, +1 */7209 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);7210 /* brk #0x1000+enmGstReg */7211 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));7212 }7213 7214 # else7215 # error "Port me!"7216 # endif7217 7218 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7219 return off;7220 }7221 # endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */7222 7223 7224 /**7225 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all7226 * important bits.7227 *7228 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.7229 * Trashes EFLAGS on AMD64.7230 */7231 DECL_HIDDEN_THROW(uint32_t)7232 iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)7233 {7234 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);7235 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));7236 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);7237 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);7238 7239 #ifdef RT_ARCH_AMD647240 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);7241 7242 /* je/jz +1 */7243 pbCodeBuf[off++] = 0x74;7244 pbCodeBuf[off++] = 0x01;7245 7246 /* int3 */7247 pbCodeBuf[off++] = 0xcc;7248 7249 # elif defined(RT_ARCH_ARM64)7250 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);7251 7252 /* b.eq +1 */7253 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);7254 /* brk #0x2000 */7255 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));7256 7257 # else7258 # error "Port me!"7259 # endif7260 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7261 7262 iemNativeRegFreeTmp(pReNative, idxRegTmp);7263 return off;7264 }7265 7266 #endif /* VBOX_STRICT */7267 7268 7269 #ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING7270 /**7271 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.7272 */7273 DECL_HIDDEN_THROW(uint32_t)7274 iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)7275 {7276 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);7277 7278 fEflNeeded &= X86_EFL_STATUS_BITS;7279 if (fEflNeeded)7280 {7281 # ifdef RT_ARCH_AMD647282 /* test dword [pVCpu + offVCpu], imm32 */7283 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);7284 if (fEflNeeded <= 0xff)7285 {7286 pCodeBuf[off++] = 0xf6;7287 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);7288 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);7289 }7290 else7291 {7292 pCodeBuf[off++] = 0xf7;7293 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);7294 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);7295 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);7296 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);7297 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);7298 }7299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7300 7301 # else7302 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);7303 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);7304 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);7305 # ifdef RT_ARCH_ARM647306 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);7307 off = iemNativeEmitBrk(pReNative, off, 0x7777);7308 # else7309 # error "Port me!"7310 # endif7311 iemNativeRegFreeTmp(pReNative, idxRegTmp);7312 # endif7313 }7314 return off;7315 }7316 #endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */7317 7318 7319 /**7320 * Emits a code for checking the return code of a call and rcPassUp, returning7321 * from the code if either are non-zero.7322 */7323 DECL_HIDDEN_THROW(uint32_t)7324 iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)7325 {7326 #ifdef RT_ARCH_AMD647327 /*7328 * AMD64: eax = call status code.7329 */7330 7331 /* edx = rcPassUp */7332 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));7333 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING7334 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);7335 # endif7336 7337 /* edx = eax | rcPassUp */7338 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);7339 pbCodeBuf[off++] = 0x0b; /* or edx, eax */7340 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);7341 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7342 7343 /* Jump to non-zero status return path. */7344 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);7345 7346 /* done. */7347 7348 #elif RT_ARCH_ARM647349 /*7350 * ARM64: w0 = call status code.7351 */7352 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING7353 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);7354 # endif7355 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));7356 7357 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);7358 7359 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);7360 7361 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);7362 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);7363 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);7364 7365 #else7366 # error "port me"7367 #endif7368 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7369 RT_NOREF_PV(idxInstr);7370 return off;7371 }7372 7373 7374 /**7375 * Emits code to check if the content of @a idxAddrReg is a canonical address,7376 * raising a \#GP(0) if it isn't.7377 *7378 * @returns New code buffer offset, UINT32_MAX on failure.7379 * @param pReNative The native recompile state.7380 * @param off The code buffer offset.7381 * @param idxAddrReg The host register with the address to check.7382 * @param idxInstr The current instruction.7383 */7384 DECL_HIDDEN_THROW(uint32_t)7385 iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)7386 {7387 /*7388 * Make sure we don't have any outstanding guest register writes as we may7389 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.7390 */7391 off = iemNativeRegFlushPendingWrites(pReNative, off);7392 7393 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING7394 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));7395 #else7396 RT_NOREF(idxInstr);7397 #endif7398 7399 #ifdef RT_ARCH_AMD647400 /*7401 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)7402 * return raisexcpt();7403 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.7404 */7405 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);7406 7407 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);7408 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);7409 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);7410 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);7411 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);7412 7413 iemNativeRegFreeTmp(pReNative, iTmpReg);7414 7415 #elif defined(RT_ARCH_ARM64)7416 /*7417 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)7418 * return raisexcpt();7419 * ----7420 * mov x1, 0x8000000000007421 * add x1, x0, x17422 * cmp xzr, x1, lsr 487423 * b.ne .Lraisexcpt7424 */7425 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);7426 7427 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));7428 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);7429 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);7430 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);7431 7432 iemNativeRegFreeTmp(pReNative, iTmpReg);7433 7434 #else7435 # error "Port me"7436 #endif7437 return off;7438 }7439 7440 7441 /**7442 * Emits code to check if that the content of @a idxAddrReg is within the limit7443 * of CS, raising a \#GP(0) if it isn't.7444 *7445 * @returns New code buffer offset; throws VBox status code on error.7446 * @param pReNative The native recompile state.7447 * @param off The code buffer offset.7448 * @param idxAddrReg The host register (32-bit) with the address to7449 * check.7450 * @param idxInstr The current instruction.7451 */7452 DECL_HIDDEN_THROW(uint32_t)7453 iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,7454 uint8_t idxAddrReg, uint8_t idxInstr)7455 {7456 /*7457 * Make sure we don't have any outstanding guest register writes as we may7458 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.7459 */7460 off = iemNativeRegFlushPendingWrites(pReNative, off);7461 7462 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING7463 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));7464 #else7465 RT_NOREF(idxInstr);7466 #endif7467 7468 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,7469 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),7470 kIemNativeGstRegUse_ReadOnly);7471 7472 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);7473 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);7474 7475 iemNativeRegFreeTmp(pReNative, idxRegCsLim);7476 return off;7477 }7478 7479 7480 /**7481 * Emits a call to a CImpl function or something similar.7482 */7483 DECL_HIDDEN_THROW(uint32_t)7484 iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,7485 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)7486 {7487 /* Writeback everything. */7488 off = iemNativeRegFlushPendingWrites(pReNative, off);7489 7490 /*7491 * Flush stuff. PC and EFlags are implictly flushed, the latter because we7492 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.7493 */7494 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,7495 fGstShwFlush7496 | RT_BIT_64(kIemNativeGstReg_Pc)7497 | RT_BIT_64(kIemNativeGstReg_EFlags));7498 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);7499 7500 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);7501 7502 /*7503 * Load the parameters.7504 */7505 #if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)7506 /* Special code the hidden VBOXSTRICTRC pointer. */7507 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);7508 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */7509 if (cAddParams > 0)7510 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);7511 if (cAddParams > 1)7512 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);7513 if (cAddParams > 2)7514 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);7515 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */7516 7517 #else7518 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);7519 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);7520 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */7521 if (cAddParams > 0)7522 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);7523 if (cAddParams > 1)7524 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);7525 if (cAddParams > 2)7526 # if IEMNATIVE_CALL_ARG_GREG_COUNT >= 57527 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);7528 # else7529 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);7530 # endif7531 #endif7532 7533 /*7534 * Make the call.7535 */7536 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);7537 7538 #if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)7539 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */7540 #endif7541 7542 /*7543 * Check the status code.7544 */7545 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);7546 }7547 7548 7549 /**7550 * Emits a call to a threaded worker function.7551 */7552 DECL_HIDDEN_THROW(uint32_t)7553 iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)7554 {7555 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);7556 7557 /* We don't know what the threaded function is doing so we must flush all pending writes. */7558 off = iemNativeRegFlushPendingWrites(pReNative, off);7559 7560 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */7561 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);7562 7563 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING7564 /* The threaded function may throw / long jmp, so set current instruction7565 number if we're counting. */7566 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));7567 #endif7568 7569 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];7570 7571 #ifdef RT_ARCH_AMD647572 /* Load the parameters and emit the call. */7573 # ifdef RT_OS_WINDOWS7574 # ifndef VBOXSTRICTRC_STRICT_ENABLED7575 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);7576 if (cParams > 0)7577 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);7578 if (cParams > 1)7579 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);7580 if (cParams > 2)7581 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);7582 # else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */7583 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);7584 if (cParams > 0)7585 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);7586 if (cParams > 1)7587 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);7588 if (cParams > 2)7589 {7590 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);7591 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);7592 }7593 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */7594 # endif /* VBOXSTRICTRC_STRICT_ENABLED */7595 # else7596 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);7597 if (cParams > 0)7598 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);7599 if (cParams > 1)7600 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);7601 if (cParams > 2)7602 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);7603 # endif7604 7605 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);7606 7607 # if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)7608 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */7609 # endif7610 7611 #elif RT_ARCH_ARM647612 /*7613 * ARM64:7614 */7615 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);7616 if (cParams > 0)7617 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);7618 if (cParams > 1)7619 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);7620 if (cParams > 2)7621 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);7622 7623 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);7624 7625 #else7626 # error "port me"7627 #endif7628 7629 /*7630 * Check the status code.7631 */7632 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);7633 7634 return off;7635 }7636 7637 #ifdef VBOX_WITH_STATISTICS7638 /**7639 * Emits code to update the thread call statistics.7640 */7641 DECL_INLINE_THROW(uint32_t)7642 iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)7643 {7644 /*7645 * Update threaded function stats.7646 */7647 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);7648 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));7649 # if defined(RT_ARCH_ARM64)7650 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);7651 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);7652 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);7653 iemNativeRegFreeTmp(pReNative, idxTmp1);7654 iemNativeRegFreeTmp(pReNative, idxTmp2);7655 # else7656 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);7657 # endif7658 return off;7659 }7660 #endif /* VBOX_WITH_STATISTICS */7661 7662 7663 /**7664 * Emits the code at the ReturnWithFlags label (returns7665 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).7666 */7667 static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)7668 {7669 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);7670 if (idxLabel != UINT32_MAX)7671 {7672 iemNativeLabelDefine(pReNative, idxLabel, off);7673 7674 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);7675 7676 /* jump back to the return sequence. */7677 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);7678 }7679 return off;7680 }7681 7682 7683 /**7684 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).7685 */7686 static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)7687 {7688 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);7689 if (idxLabel != UINT32_MAX)7690 {7691 iemNativeLabelDefine(pReNative, idxLabel, off);7692 7693 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);7694 7695 /* jump back to the return sequence. */7696 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);7697 }7698 return off;7699 }7700 7701 7702 /**7703 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.7704 */7705 static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)7706 {7707 /*7708 * Generate the rc + rcPassUp fiddling code if needed.7709 */7710 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);7711 if (idxLabel != UINT32_MAX)7712 {7713 iemNativeLabelDefine(pReNative, idxLabel, off);7714 7715 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */7716 #ifdef RT_ARCH_AMD647717 # ifdef RT_OS_WINDOWS7718 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING7719 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */7720 # endif7721 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);7722 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);7723 # else7724 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);7725 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);7726 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING7727 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */7728 # endif7729 # endif7730 # ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING7731 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);7732 # endif7733 7734 #else7735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);7736 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);7737 /* IEMNATIVE_CALL_ARG2_GREG is already set. */7738 #endif7739 7740 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);7741 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);7742 }7743 return off;7744 }7745 7746 7747 /**7748 * Emits a standard epilog.7749 */7750 static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)7751 {7752 *pidxReturnLabel = UINT32_MAX;7753 7754 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */7755 off = iemNativeRegFlushPendingWrites(pReNative, off);7756 7757 /*7758 * Successful return, so clear the return register (eax, w0).7759 */7760 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);7761 7762 /*7763 * Define label for common return point.7764 */7765 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);7766 *pidxReturnLabel = idxReturn;7767 7768 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);7769 7770 /*7771 * Restore registers and return.7772 */7773 #ifdef RT_ARCH_AMD647774 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);7775 7776 /* Reposition esp at the r15 restore point. */7777 pbCodeBuf[off++] = X86_OP_REX_W;7778 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */7779 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);7780 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;7781 7782 /* Pop non-volatile registers and return */7783 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */7784 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;7785 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */7786 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;7787 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */7788 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;7789 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */7790 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;7791 # ifdef RT_OS_WINDOWS7792 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */7793 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */7794 # endif7795 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */7796 pbCodeBuf[off++] = 0xc9; /* leave */7797 pbCodeBuf[off++] = 0xc3; /* ret */7798 pbCodeBuf[off++] = 0xcc; /* int3 poison */7799 7800 #elif RT_ARCH_ARM647801 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);7802 7803 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */7804 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);7805 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,7806 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,7807 IEMNATIVE_FRAME_VAR_SIZE / 8);7808 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */7809 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7810 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);7811 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7812 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);7813 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7814 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);7815 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7816 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);7817 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7818 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);7819 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);7820 7821 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */7822 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);7823 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,7824 IEMNATIVE_FRAME_SAVE_REG_SIZE);7825 7826 /* retab / ret */7827 # ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */7828 if (1)7829 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;7830 else7831 # endif7832 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;7833 7834 #else7835 # error "port me"7836 #endif7837 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7838 7839 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);7840 }7841 7842 7843 /**7844 * Emits a standard prolog.7845 */7846 static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)7847 {7848 #ifdef RT_ARCH_AMD647849 /*7850 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,7851 * reserving 64 bytes for stack variables plus 4 non-register argument7852 * slots. Fixed register assignment: xBX = pReNative;7853 *7854 * Since we always do the same register spilling, we can use the same7855 * unwind description for all the code.7856 */7857 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);7858 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */7859 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */7860 pbCodeBuf[off++] = 0x8b;7861 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);7862 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */7863 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);7864 # ifdef RT_OS_WINDOWS7865 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */7866 pbCodeBuf[off++] = 0x8b;7867 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);7868 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */7869 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */7870 # else7871 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */7872 pbCodeBuf[off++] = 0x8b;7873 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);7874 # endif7875 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */7876 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;7877 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */7878 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;7879 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */7880 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;7881 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */7882 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;7883 7884 # ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP7885 /* Save the frame pointer. */7886 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));7887 # endif7888 7889 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */7890 X86_GREG_xSP,7891 IEMNATIVE_FRAME_ALIGN_SIZE7892 + IEMNATIVE_FRAME_VAR_SIZE7893 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 87894 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);7895 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));7896 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));7897 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));7898 7899 #elif RT_ARCH_ARM647900 /*7901 * We set up a stack frame exactly like on x86, only we have to push the7902 * return address our selves here. We save all non-volatile registers.7903 */7904 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);7905 7906 # ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable7907 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's7908 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's7909 * in any way conditional, so just emitting this instructions now and hoping for the best... */7910 /* pacibsp */7911 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;7912 # endif7913 7914 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */7915 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);7916 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,7917 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,7918 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);7919 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */7920 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7921 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);7922 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7923 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);7924 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7925 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);7926 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7927 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);7928 /* Save the BP and LR (ret address) registers at the top of the frame. */7929 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,7930 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);7931 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);7932 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */7933 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,7934 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);7935 7936 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */7937 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);7938 7939 /* mov r28, r0 */7940 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);7941 /* mov r27, r1 */7942 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);7943 7944 # ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP7945 /* Save the frame pointer. */7946 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),7947 ARMV8_A64_REG_X2);7948 # endif7949 7950 #else7951 # error "port me"7952 #endif7953 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);7954 return off;7955 }7956 7957 7958 /*********************************************************************************************************************************7959 * Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *7960 *********************************************************************************************************************************/7961 7962 /**7963 * Internal work that allocates a variable with kind set to7964 * kIemNativeVarKind_Invalid and no current stack allocation.7965 *7966 * The kind will either be set by the caller or later when the variable is first7967 * assigned a value.7968 *7969 * @returns Unpacked index.7970 * @internal7971 */7972 static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)7973 {7974 Assert(cbType > 0 && cbType <= 64);7975 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;7976 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));7977 pReNative->Core.bmVars |= RT_BIT_32(idxVar);7978 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;7979 pReNative->Core.aVars[idxVar].cbVar = cbType;7980 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;7981 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;7982 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;7983 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;7984 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;7985 pReNative->Core.aVars[idxVar].fRegAcquired = false;7986 pReNative->Core.aVars[idxVar].u.uValue = 0;7987 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR7988 pReNative->Core.aVars[idxVar].fSimdReg = false;7989 #endif7990 return idxVar;7991 }7992 7993 7994 /**7995 * Internal work that allocates an argument variable w/o setting enmKind.7996 *7997 * @returns Unpacked index.7998 * @internal7999 */8000 static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)8001 {8002 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);8003 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));8004 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));8005 8006 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);8007 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */8008 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;8009 return idxVar;8010 }8011 8012 8013 /**8014 * Gets the stack slot for a stack variable, allocating one if necessary.8015 *8016 * Calling this function implies that the stack slot will contain a valid8017 * variable value. The caller deals with any register currently assigned to the8018 * variable, typically by spilling it into the stack slot.8019 *8020 * @returns The stack slot number.8021 * @param pReNative The recompiler state.8022 * @param idxVar The variable.8023 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS8024 */8025 DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)8026 {8027 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8028 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];8029 Assert(pVar->enmKind == kIemNativeVarKind_Stack);8030 8031 /* Already got a slot? */8032 uint8_t const idxStackSlot = pVar->idxStackSlot;8033 if (idxStackSlot != UINT8_MAX)8034 {8035 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);8036 return idxStackSlot;8037 }8038 8039 /*8040 * A single slot is easy to allocate.8041 * Allocate them from the top end, closest to BP, to reduce the displacement.8042 */8043 if (pVar->cbVar <= sizeof(uint64_t))8044 {8045 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;8046 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));8047 pReNative->Core.bmStack |= RT_BIT_32(iSlot);8048 pVar->idxStackSlot = (uint8_t)iSlot;8049 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));8050 return (uint8_t)iSlot;8051 }8052 8053 /*8054 * We need more than one stack slot.8055 *8056 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;8057 */8058 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */8059 Assert(pVar->cbVar <= 64);8060 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;8061 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;8062 uint32_t bmStack = pReNative->Core.bmStack;8063 while (bmStack != UINT32_MAX)8064 {8065 unsigned iSlot = ASMBitLastSetU32(~bmStack);8066 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));8067 iSlot = (iSlot - 1) & ~fBitAlignMask;8068 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)8069 {8070 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);8071 pVar->idxStackSlot = (uint8_t)iSlot;8072 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",8073 idxVar, iSlot, fBitAllocMask, pVar->cbVar));8074 return (uint8_t)iSlot;8075 }8076 8077 bmStack |= (fBitAllocMask << iSlot);8078 }8079 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));8080 }8081 8082 8083 /**8084 * Changes the variable to a stack variable.8085 *8086 * Currently this is s only possible to do the first time the variable is used,8087 * switching later is can be implemented but not done.8088 *8089 * @param pReNative The recompiler state.8090 * @param idxVar The variable.8091 * @throws VERR_IEM_VAR_IPE_28092 */8093 DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)8094 {8095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8096 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];8097 if (pVar->enmKind != kIemNativeVarKind_Stack)8098 {8099 /* We could in theory transition from immediate to stack as well, but it8100 would involve the caller doing work storing the value on the stack. So,8101 till that's required we only allow transition from invalid. */8102 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));8103 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));8104 pVar->enmKind = kIemNativeVarKind_Stack;8105 8106 /* Note! We don't allocate a stack slot here, that's only done when a8107 slot is actually needed to hold a variable value. */8108 }8109 }8110 8111 8112 /**8113 * Sets it to a variable with a constant value.8114 *8115 * This does not require stack storage as we know the value and can always8116 * reload it, unless of course it's referenced.8117 *8118 * @param pReNative The recompiler state.8119 * @param idxVar The variable.8120 * @param uValue The immediate value.8121 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_28122 */8123 DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)8124 {8125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8126 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];8127 if (pVar->enmKind != kIemNativeVarKind_Immediate)8128 {8129 /* Only simple transitions for now. */8130 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));8131 pVar->enmKind = kIemNativeVarKind_Immediate;8132 }8133 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));8134 8135 pVar->u.uValue = uValue;8136 AssertMsg( pVar->cbVar >= sizeof(uint64_t)8137 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),8138 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));8139 }8140 8141 8142 /**8143 * Sets the variable to a reference (pointer) to @a idxOtherVar.8144 *8145 * This does not require stack storage as we know the value and can always8146 * reload it. Loading is postponed till needed.8147 *8148 * @param pReNative The recompiler state.8149 * @param idxVar The variable. Unpacked.8150 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.8151 *8152 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_28153 * @internal8154 */8155 static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)8156 {8157 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));8158 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));8159 8160 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)8161 {8162 /* Only simple transitions for now. */8163 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,8164 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));8165 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;8166 }8167 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));8168 8169 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */8170 8171 /* Update the other variable, ensure it's a stack variable. */8172 /** @todo handle variables with const values... that'll go boom now. */8173 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;8174 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));8175 }8176 8177 8178 /**8179 * Sets the variable to a reference (pointer) to a guest register reference.8180 *8181 * This does not require stack storage as we know the value and can always8182 * reload it. Loading is postponed till needed.8183 *8184 * @param pReNative The recompiler state.8185 * @param idxVar The variable.8186 * @param enmRegClass The class guest registers to reference.8187 * @param idxReg The register within @a enmRegClass to reference.8188 *8189 * @throws VERR_IEM_VAR_IPE_28190 */8191 DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,8192 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)8193 {8194 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8195 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];8196 8197 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)8198 {8199 /* Only simple transitions for now. */8200 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));8201 pVar->enmKind = kIemNativeVarKind_GstRegRef;8202 }8203 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));8204 8205 pVar->u.GstRegRef.enmClass = enmRegClass;8206 pVar->u.GstRegRef.idx = idxReg;8207 }8208 8209 8210 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)8211 {8212 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));8213 }8214 8215 8216 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)8217 {8218 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));8219 8220 /* Since we're using a generic uint64_t value type, we must truncate it if8221 the variable is smaller otherwise we may end up with too large value when8222 scaling up a imm8 w/ sign-extension.8223 8224 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac608225 in the bios, bx=1) when running on arm, because clang expect 16-bit8226 register parameters to have bits 16 and up set to zero. Instead of8227 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong8228 CF value in the result. */8229 switch (cbType)8230 {8231 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;8232 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;8233 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;8234 }8235 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);8236 return idxVar;8237 }8238 8239 8240 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)8241 {8242 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);8243 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);8244 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)8245 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))8246 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,8247 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));8248 8249 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));8250 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);8251 return idxArgVar;8252 }8253 8254 8255 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)8256 {8257 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));8258 /* Don't set to stack now, leave that to the first use as for instance8259 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */8260 return idxVar;8261 }8262 8263 8264 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)8265 {8266 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));8267 8268 /* Since we're using a generic uint64_t value type, we must truncate it if8269 the variable is smaller otherwise we may end up with too large value when8270 scaling up a imm8 w/ sign-extension. */8271 switch (cbType)8272 {8273 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;8274 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;8275 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;8276 }8277 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);8278 return idxVar;8279 }8280 8281 8282 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)8283 {8284 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));8285 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));8286 8287 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);8288 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);8289 8290 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);8291 8292 /* Truncate the value to this variables size. */8293 switch (cbType)8294 {8295 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;8296 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;8297 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;8298 }8299 8300 iemNativeVarRegisterRelease(pReNative, idxVarOther);8301 iemNativeVarRegisterRelease(pReNative, idxVar);8302 return idxVar;8303 }8304 8305 8306 /**8307 * Makes sure variable @a idxVar has a register assigned to it and that it stays8308 * fixed till we call iemNativeVarRegisterRelease.8309 *8310 * @returns The host register number.8311 * @param pReNative The recompiler state.8312 * @param idxVar The variable.8313 * @param poff Pointer to the instruction buffer offset.8314 * In case a register needs to be freed up or the value8315 * loaded off the stack.8316 * @param fInitialized Set if the variable must already have been initialized.8317 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not8318 * the case.8319 * @param idxRegPref Preferred register number or UINT8_MAX.8320 */8321 DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,8322 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)8323 {8324 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8325 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];8326 Assert(pVar->cbVar <= 8);8327 Assert(!pVar->fRegAcquired);8328 8329 uint8_t idxReg = pVar->idxReg;8330 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))8331 {8332 Assert( pVar->enmKind > kIemNativeVarKind_Invalid8333 && pVar->enmKind < kIemNativeVarKind_End);8334 pVar->fRegAcquired = true;8335 return idxReg;8336 }8337 8338 /*8339 * If the kind of variable has not yet been set, default to 'stack'.8340 */8341 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid8342 && pVar->enmKind < kIemNativeVarKind_End);8343 if (pVar->enmKind == kIemNativeVarKind_Invalid)8344 iemNativeVarSetKindToStack(pReNative, idxVar);8345 8346 /*8347 * We have to allocate a register for the variable, even if its a stack one8348 * as we don't know if there are modification being made to it before its8349 * finalized (todo: analyze and insert hints about that?).8350 *8351 * If we can, we try get the correct register for argument variables. This8352 * is assuming that most argument variables are fetched as close as possible8353 * to the actual call, so that there aren't any interfering hidden calls8354 * (memory accesses, etc) inbetween.8355 *8356 * If we cannot or it's a variable, we make sure no argument registers8357 * that will be used by this MC block will be allocated here, and we always8358 * prefer non-volatile registers to avoid needing to spill stuff for internal8359 * call.8360 */8361 /** @todo Detect too early argument value fetches and warn about hidden8362 * calls causing less optimal code to be generated in the python script. */8363 8364 uint8_t const uArgNo = pVar->uArgNo;8365 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)8366 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))8367 {8368 idxReg = g_aidxIemNativeCallRegs[uArgNo];8369 8370 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK8371 /* Writeback any dirty shadow registers we are about to unshadow. */8372 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);8373 #endif8374 8375 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);8376 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));8377 }8378 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)8379 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))8380 {8381 /** @todo there must be a better way for this and boot cArgsX? */8382 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];8383 uint32_t const fRegs = ~pReNative->Core.bmHstRegs8384 & ~pReNative->Core.bmHstRegsWithGstShadow8385 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)8386 & fNotArgsMask;8387 if (fRegs)8388 {8389 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */8390 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK8391 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;8392 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);8393 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));8394 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));8395 }8396 else8397 {8398 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,8399 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);8400 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));8401 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));8402 }8403 }8404 else8405 {8406 idxReg = idxRegPref;8407 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);8408 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));8409 }8410 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);8411 pVar->idxReg = idxReg;8412 8413 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR8414 pVar->fSimdReg = false;8415 #endif8416 8417 /*8418 * Load it off the stack if we've got a stack slot.8419 */8420 uint8_t const idxStackSlot = pVar->idxStackSlot;8421 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)8422 {8423 Assert(fInitialized);8424 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);8425 switch (pVar->cbVar)8426 {8427 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;8428 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;8429 case 3: AssertFailed(); RT_FALL_THRU();8430 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;8431 default: AssertFailed(); RT_FALL_THRU();8432 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;8433 }8434 }8435 else8436 {8437 Assert(idxStackSlot == UINT8_MAX);8438 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));8439 }8440 pVar->fRegAcquired = true;8441 return idxReg;8442 }8443 8444 8445 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR8446 /**8447 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays8448 * fixed till we call iemNativeVarRegisterRelease.8449 *8450 * @returns The host register number.8451 * @param pReNative The recompiler state.8452 * @param idxVar The variable.8453 * @param poff Pointer to the instruction buffer offset.8454 * In case a register needs to be freed up or the value8455 * loaded off the stack.8456 * @param fInitialized Set if the variable must already have been initialized.8457 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not8458 * the case.8459 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.8460 */8461 DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,8462 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)8463 {8464 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8465 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];8466 Assert( pVar->cbVar == sizeof(RTUINT128U)8467 || pVar->cbVar == sizeof(RTUINT256U));8468 Assert(!pVar->fRegAcquired);8469 8470 uint8_t idxReg = pVar->idxReg;8471 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))8472 {8473 Assert( pVar->enmKind > kIemNativeVarKind_Invalid8474 && pVar->enmKind < kIemNativeVarKind_End);8475 pVar->fRegAcquired = true;8476 return idxReg;8477 }8478 8479 /*8480 * If the kind of variable has not yet been set, default to 'stack'.8481 */8482 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid8483 && pVar->enmKind < kIemNativeVarKind_End);8484 if (pVar->enmKind == kIemNativeVarKind_Invalid)8485 iemNativeVarSetKindToStack(pReNative, idxVar);8486 8487 /*8488 * We have to allocate a register for the variable, even if its a stack one8489 * as we don't know if there are modification being made to it before its8490 * finalized (todo: analyze and insert hints about that?).8491 *8492 * If we can, we try get the correct register for argument variables. This8493 * is assuming that most argument variables are fetched as close as possible8494 * to the actual call, so that there aren't any interfering hidden calls8495 * (memory accesses, etc) inbetween.8496 *8497 * If we cannot or it's a variable, we make sure no argument registers8498 * that will be used by this MC block will be allocated here, and we always8499 * prefer non-volatile registers to avoid needing to spill stuff for internal8500 * call.8501 */8502 /** @todo Detect too early argument value fetches and warn about hidden8503 * calls causing less optimal code to be generated in the python script. */8504 8505 uint8_t const uArgNo = pVar->uArgNo;8506 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */8507 8508 /* SIMD is bit simpler for now because there is no support for arguments. */8509 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)8510 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))8511 {8512 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];8513 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs8514 & ~pReNative->Core.bmHstSimdRegsWithGstShadow8515 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)8516 & fNotArgsMask;8517 if (fRegs)8518 {8519 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK8520 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;8521 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);8522 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));8523 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));8524 }8525 else8526 {8527 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,8528 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);8529 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));8530 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));8531 }8532 }8533 else8534 {8535 idxReg = idxRegPref;8536 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);8537 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));8538 }8539 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);8540 8541 pVar->fSimdReg = true;8542 pVar->idxReg = idxReg;8543 8544 /*8545 * Load it off the stack if we've got a stack slot.8546 */8547 uint8_t const idxStackSlot = pVar->idxStackSlot;8548 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)8549 {8550 Assert(fInitialized);8551 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);8552 switch (pVar->cbVar)8553 {8554 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;8555 default: AssertFailed(); RT_FALL_THRU();8556 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;8557 }8558 }8559 else8560 {8561 Assert(idxStackSlot == UINT8_MAX);8562 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));8563 }8564 pVar->fRegAcquired = true;8565 return idxReg;8566 }8567 #endif8568 8569 8570 /**8571 * The value of variable @a idxVar will be written in full to the @a enmGstReg8572 * guest register.8573 *8574 * This function makes sure there is a register for it and sets it to be the8575 * current shadow copy of @a enmGstReg.8576 *8577 * @returns The host register number.8578 * @param pReNative The recompiler state.8579 * @param idxVar The variable.8580 * @param enmGstReg The guest register this variable will be written to8581 * after this call.8582 * @param poff Pointer to the instruction buffer offset.8583 * In case a register needs to be freed up or if the8584 * variable content needs to be loaded off the stack.8585 *8586 * @note We DO NOT expect @a idxVar to be an argument variable,8587 * because we can only in the commit stage of an instruction when this8588 * function is used.8589 */8590 DECL_HIDDEN_THROW(uint8_t)8591 iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)8592 {8593 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8594 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];8595 Assert(!pVar->fRegAcquired);8596 AssertMsgStmt( pVar->cbVar <= 88597 && ( pVar->enmKind == kIemNativeVarKind_Immediate8598 || pVar->enmKind == kIemNativeVarKind_Stack),8599 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,8600 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),8601 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));8602 8603 /*8604 * This shouldn't ever be used for arguments, unless it's in a weird else8605 * branch that doesn't do any calling and even then it's questionable.8606 *8607 * However, in case someone writes crazy wrong MC code and does register8608 * updates before making calls, just use the regular register allocator to8609 * ensure we get a register suitable for the intended argument number.8610 */8611 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));8612 8613 /*8614 * If there is already a register for the variable, we transfer/set the8615 * guest shadow copy assignment to it.8616 */8617 uint8_t idxReg = pVar->idxReg;8618 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))8619 {8620 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK8621 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)8622 {8623 # ifdef IEMNATIVE_WITH_TB_DEBUG_INFO8624 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);8625 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);8626 # endif8627 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);8628 }8629 #endif8630 8631 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))8632 {8633 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];8634 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);8635 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",8636 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));8637 }8638 else8639 {8640 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);8641 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",8642 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));8643 }8644 /** @todo figure this one out. We need some way of making sure the register isn't8645 * modified after this point, just in case we start writing crappy MC code. */8646 pVar->enmGstReg = enmGstReg;8647 pVar->fRegAcquired = true;8648 return idxReg;8649 }8650 Assert(pVar->uArgNo == UINT8_MAX);8651 8652 /*8653 * Because this is supposed to be the commit stage, we're just tag along with the8654 * temporary register allocator and upgrade it to a variable register.8655 */8656 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);8657 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);8658 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);8659 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;8660 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;8661 pVar->idxReg = idxReg;8662 8663 /*8664 * Now we need to load the register value.8665 */8666 if (pVar->enmKind == kIemNativeVarKind_Immediate)8667 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);8668 else8669 {8670 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);8671 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);8672 switch (pVar->cbVar)8673 {8674 case sizeof(uint64_t):8675 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);8676 break;8677 case sizeof(uint32_t):8678 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);8679 break;8680 case sizeof(uint16_t):8681 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);8682 break;8683 case sizeof(uint8_t):8684 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);8685 break;8686 default:8687 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));8688 }8689 }8690 8691 pVar->fRegAcquired = true;8692 return idxReg;8693 }8694 8695 8696 /**8697 * Emit code to save volatile registers prior to a call to a helper (TLB miss).8698 *8699 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and8700 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the8701 * requirement of flushing anything in volatile host registers when making a8702 * call.8703 *8704 * @returns New @a off value.8705 * @param pReNative The recompiler state.8706 * @param off The code buffer position.8707 * @param fHstRegsNotToSave Set of registers not to save & restore.8708 */8709 DECL_HIDDEN_THROW(uint32_t)8710 iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)8711 {8712 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;8713 if (fHstRegs)8714 {8715 do8716 {8717 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;8718 fHstRegs &= ~RT_BIT_32(idxHstReg);8719 8720 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)8721 {8722 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;8723 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8724 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)8725 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))8726 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,8727 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));8728 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)8729 {8730 case kIemNativeVarKind_Stack:8731 {8732 /* Temporarily spill the variable register. */8733 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);8734 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",8735 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));8736 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);8737 continue;8738 }8739 8740 case kIemNativeVarKind_Immediate:8741 case kIemNativeVarKind_VarRef:8742 case kIemNativeVarKind_GstRegRef:8743 /* It is weird to have any of these loaded at this point. */8744 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));8745 continue;8746 8747 case kIemNativeVarKind_End:8748 case kIemNativeVarKind_Invalid:8749 break;8750 }8751 AssertFailed();8752 }8753 else8754 {8755 /*8756 * Allocate a temporary stack slot and spill the register to it.8757 */8758 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;8759 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,8760 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));8761 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);8762 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;8763 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",8764 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));8765 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);8766 }8767 } while (fHstRegs);8768 }8769 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR8770 8771 /*8772 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot8773 * which would be more difficult due to spanning multiple stack slots and different sizes8774 * (besides we only have a limited amount of slots at the moment).8775 *8776 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by8777 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.8778 */8779 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);8780 8781 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;8782 if (fHstRegs)8783 {8784 do8785 {8786 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;8787 fHstRegs &= ~RT_BIT_32(idxHstReg);8788 8789 /* Fixed reserved and temporary registers don't need saving. */8790 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved8791 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)8792 continue;8793 8794 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);8795 8796 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;8797 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8798 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)8799 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))8800 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg8801 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg8802 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)8803 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),8804 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));8805 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)8806 {8807 case kIemNativeVarKind_Stack:8808 {8809 /* Temporarily spill the variable register. */8810 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;8811 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);8812 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",8813 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));8814 if (cbVar == sizeof(RTUINT128U))8815 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);8816 else8817 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);8818 continue;8819 }8820 8821 case kIemNativeVarKind_Immediate:8822 case kIemNativeVarKind_VarRef:8823 case kIemNativeVarKind_GstRegRef:8824 /* It is weird to have any of these loaded at this point. */8825 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));8826 continue;8827 8828 case kIemNativeVarKind_End:8829 case kIemNativeVarKind_Invalid:8830 break;8831 }8832 AssertFailed();8833 } while (fHstRegs);8834 }8835 #endif8836 return off;8837 }8838 8839 8840 /**8841 * Emit code to restore volatile registers after to a call to a helper.8842 *8843 * @returns New @a off value.8844 * @param pReNative The recompiler state.8845 * @param off The code buffer position.8846 * @param fHstRegsNotToSave Set of registers not to save & restore.8847 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),8848 * iemNativeRegRestoreGuestShadowsInVolatileRegs()8849 */8850 DECL_HIDDEN_THROW(uint32_t)8851 iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)8852 {8853 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;8854 if (fHstRegs)8855 {8856 do8857 {8858 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;8859 fHstRegs &= ~RT_BIT_32(idxHstReg);8860 8861 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)8862 {8863 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;8864 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8865 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)8866 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))8867 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,8868 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));8869 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)8870 {8871 case kIemNativeVarKind_Stack:8872 {8873 /* Unspill the variable register. */8874 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);8875 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",8876 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));8877 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));8878 continue;8879 }8880 8881 case kIemNativeVarKind_Immediate:8882 case kIemNativeVarKind_VarRef:8883 case kIemNativeVarKind_GstRegRef:8884 /* It is weird to have any of these loaded at this point. */8885 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));8886 continue;8887 8888 case kIemNativeVarKind_End:8889 case kIemNativeVarKind_Invalid:8890 break;8891 }8892 AssertFailed();8893 }8894 else8895 {8896 /*8897 * Restore from temporary stack slot.8898 */8899 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;8900 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));8901 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);8902 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;8903 8904 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));8905 }8906 } while (fHstRegs);8907 }8908 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR8909 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;8910 if (fHstRegs)8911 {8912 do8913 {8914 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;8915 fHstRegs &= ~RT_BIT_32(idxHstReg);8916 8917 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp8918 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)8919 continue;8920 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);8921 8922 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;8923 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);8924 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)8925 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))8926 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg8927 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg8928 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)8929 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),8930 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));8931 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)8932 {8933 case kIemNativeVarKind_Stack:8934 {8935 /* Unspill the variable register. */8936 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;8937 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);8938 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",8939 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));8940 8941 if (cbVar == sizeof(RTUINT128U))8942 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));8943 else8944 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));8945 continue;8946 }8947 8948 case kIemNativeVarKind_Immediate:8949 case kIemNativeVarKind_VarRef:8950 case kIemNativeVarKind_GstRegRef:8951 /* It is weird to have any of these loaded at this point. */8952 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));8953 continue;8954 8955 case kIemNativeVarKind_End:8956 case kIemNativeVarKind_Invalid:8957 break;8958 }8959 AssertFailed();8960 } while (fHstRegs);8961 }8962 #endif8963 return off;8964 }8965 8966 8967 /**8968 * Worker that frees the stack slots for variable @a idxVar if any allocated.8969 *8970 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.8971 *8972 * ASSUMES that @a idxVar is valid and unpacked.8973 */8974 DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)8975 {8976 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */8977 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;8978 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)8979 {8980 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;8981 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);8982 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);8983 Assert(cSlots > 0);8984 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);8985 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",8986 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));8987 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);8988 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;8989 }8990 else8991 Assert(idxStackSlot == UINT8_MAX);8992 }8993 8994 8995 /**8996 * Worker that frees a single variable.8997 *8998 * ASSUMES that @a idxVar is valid and unpacked.8999 */9000 DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)9001 {9002 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */9003 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */9004 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);9005 9006 /* Free the host register first if any assigned. */9007 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;9008 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR9009 if ( idxHstReg != UINT8_MAX9010 && pReNative->Core.aVars[idxVar].fSimdReg)9011 {9012 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));9013 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));9014 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;9015 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);9016 }9017 else9018 #endif9019 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))9020 {9021 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));9022 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;9023 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);9024 }9025 9026 /* Free argument mapping. */9027 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;9028 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))9029 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;9030 9031 /* Free the stack slots. */9032 iemNativeVarFreeStackSlots(pReNative, idxVar);9033 9034 /* Free the actual variable. */9035 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;9036 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);9037 }9038 9039 9040 /**9041 * Worker for iemNativeVarFreeAll that's called when there is anything to do.9042 */9043 DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)9044 {9045 while (bmVars != 0)9046 {9047 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;9048 bmVars &= ~RT_BIT_32(idxVar);9049 9050 #if 1 /** @todo optimize by simplifying this later... */9051 iemNativeVarFreeOneWorker(pReNative, idxVar);9052 #else9053 /* Only need to free the host register, the rest is done as bulk updates below. */9054 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;9055 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))9056 {9057 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));9058 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;9059 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);9060 }9061 #endif9062 }9063 #if 0 /** @todo optimize by simplifying this later... */9064 pReNative->Core.bmVars = 0;9065 pReNative->Core.bmStack = 0;9066 pReNative->Core.u64ArgVars = UINT64_MAX;9067 #endif9068 }9069 9070 9071 9072 /*********************************************************************************************************************************9073 * Emitters for IEM_MC_CALL_CIMPL_XXX *9074 *********************************************************************************************************************************/9075 9076 /**9077 * Emits code to load a reference to the given guest register into @a idxGprDst.9078 */9079 DECL_HIDDEN_THROW(uint32_t)9080 iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,9081 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)9082 {9083 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING9084 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */9085 #endif9086 9087 /*9088 * Get the offset relative to the CPUMCTX structure.9089 */9090 uint32_t offCpumCtx;9091 switch (enmClass)9092 {9093 case kIemNativeGstRegRef_Gpr:9094 Assert(idxRegInClass < 16);9095 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);9096 break;9097 9098 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/9099 Assert(idxRegInClass < 4);9100 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);9101 break;9102 9103 case kIemNativeGstRegRef_EFlags:9104 Assert(idxRegInClass == 0);9105 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);9106 break;9107 9108 case kIemNativeGstRegRef_MxCsr:9109 Assert(idxRegInClass == 0);9110 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);9111 break;9112 9113 case kIemNativeGstRegRef_FpuReg:9114 Assert(idxRegInClass < 8);9115 AssertFailed(); /** @todo what kind of indexing? */9116 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);9117 break;9118 9119 case kIemNativeGstRegRef_MReg:9120 Assert(idxRegInClass < 8);9121 AssertFailed(); /** @todo what kind of indexing? */9122 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);9123 break;9124 9125 case kIemNativeGstRegRef_XReg:9126 Assert(idxRegInClass < 16);9127 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);9128 break;9129 9130 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */9131 Assert(idxRegInClass == 0);9132 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);9133 break;9134 9135 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */9136 Assert(idxRegInClass == 0);9137 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);9138 break;9139 9140 default:9141 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));9142 }9143 9144 /*9145 * Load the value into the destination register.9146 */9147 #ifdef RT_ARCH_AMD649148 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));9149 9150 #elif defined(RT_ARCH_ARM64)9151 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);9152 Assert(offCpumCtx < 4096);9153 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);9154 9155 #else9156 # error "Port me!"9157 #endif9158 9159 return off;9160 }9161 9162 9163 /**9164 * Common code for CIMPL and AIMPL calls.9165 *9166 * These are calls that uses argument variables and such. They should not be9167 * confused with internal calls required to implement an MC operation,9168 * like a TLB load and similar.9169 *9170 * Upon return all that is left to do is to load any hidden arguments and9171 * perform the call. All argument variables are freed.9172 *9173 * @returns New code buffer offset; throws VBox status code on error.9174 * @param pReNative The native recompile state.9175 * @param off The code buffer offset.9176 * @param cArgs The total nubmer of arguments (includes hidden9177 * count).9178 * @param cHiddenArgs The number of hidden arguments. The hidden9179 * arguments must not have any variable declared for9180 * them, whereas all the regular arguments must9181 * (tstIEMCheckMc ensures this).9182 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),9183 * this will still flush pending writes in call volatile registers if false.9184 */9185 DECL_HIDDEN_THROW(uint32_t)9186 iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,9187 bool fFlushPendingWrites /*= true*/)9188 {9189 #ifdef VBOX_STRICT9190 /*9191 * Assert sanity.9192 */9193 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);9194 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);9195 for (unsigned i = 0; i < cHiddenArgs; i++)9196 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);9197 for (unsigned i = cHiddenArgs; i < cArgs; i++)9198 {9199 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */9200 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));9201 }9202 iemNativeRegAssertSanity(pReNative);9203 #endif9204 9205 /* We don't know what the called function makes use of, so flush any pending register writes. */9206 RT_NOREF(fFlushPendingWrites);9207 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK9208 if (fFlushPendingWrites)9209 #endif9210 off = iemNativeRegFlushPendingWrites(pReNative, off);9211 9212 /*9213 * Before we do anything else, go over variables that are referenced and9214 * make sure they are not in a register.9215 */9216 uint32_t bmVars = pReNative->Core.bmVars;9217 if (bmVars)9218 {9219 do9220 {9221 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;9222 bmVars &= ~RT_BIT_32(idxVar);9223 9224 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)9225 {9226 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;9227 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR9228 if ( idxRegOld != UINT8_MAX9229 && pReNative->Core.aVars[idxVar].fSimdReg)9230 {9231 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));9232 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));9233 9234 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));9235 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",9236 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,9237 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));9238 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))9239 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);9240 else9241 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);9242 9243 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)9244 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));9245 9246 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;9247 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);9248 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);9249 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;9250 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;9251 }9252 else9253 #endif9254 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))9255 {9256 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));9257 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",9258 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,9259 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));9260 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);9261 9262 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;9263 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);9264 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);9265 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;9266 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;9267 }9268 }9269 } while (bmVars != 0);9270 #if 0 //def VBOX_STRICT9271 iemNativeRegAssertSanity(pReNative);9272 #endif9273 }9274 9275 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));9276 9277 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK9278 /*9279 * At the very first step go over the host registers that will be used for arguments9280 * don't shadow anything which needs writing back first.9281 */9282 for (uint32_t i = 0; i < cRegArgs; i++)9283 {9284 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];9285 9286 /* Writeback any dirty guest shadows before using this register. */9287 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)9288 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);9289 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));9290 }9291 #endif9292 9293 /*9294 * First, go over the host registers that will be used for arguments and make9295 * sure they either hold the desired argument or are free.9296 */9297 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])9298 {9299 for (uint32_t i = 0; i < cRegArgs; i++)9300 {9301 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];9302 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))9303 {9304 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)9305 {9306 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;9307 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);9308 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];9309 Assert(pVar->idxReg == idxArgReg);9310 uint8_t const uArgNo = pVar->uArgNo;9311 if (uArgNo == i)9312 { /* prefect */ }9313 /* The variable allocator logic should make sure this is impossible,9314 except for when the return register is used as a parameter (ARM,9315 but not x86). */9316 #if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK9317 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)9318 {9319 # ifdef IEMNATIVE_FP_OFF_STACK_ARG09320 # error "Implement this"9321 # endif9322 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);9323 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];9324 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),9325 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));9326 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");9327 }9328 #endif9329 else9330 {9331 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));9332 9333 if (pVar->enmKind == kIemNativeVarKind_Stack)9334 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);9335 else9336 {9337 /* just free it, can be reloaded if used again */9338 pVar->idxReg = UINT8_MAX;9339 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);9340 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);9341 }9342 }9343 }9344 else9345 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,9346 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));9347 }9348 }9349 #if 0 //def VBOX_STRICT9350 iemNativeRegAssertSanity(pReNative);9351 #endif9352 }9353 9354 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */9355 9356 #ifdef IEMNATIVE_FP_OFF_STACK_ARG09357 /*9358 * If there are any stack arguments, make sure they are in their place as well.9359 *9360 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or9361 * the caller) be loading it later and it must be free (see first loop).9362 */9363 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)9364 {9365 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)9366 {9367 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */9368 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];9369 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))9370 {9371 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */9372 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);9373 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);9374 pVar->idxReg = UINT8_MAX;9375 }9376 else9377 {9378 /* Use ARG0 as temp for stuff we need registers for. */9379 switch (pVar->enmKind)9380 {9381 case kIemNativeVarKind_Stack:9382 {9383 uint8_t const idxStackSlot = pVar->idxStackSlot;9384 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));9385 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,9386 iemNativeStackCalcBpDisp(idxStackSlot));9387 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);9388 continue;9389 }9390 9391 case kIemNativeVarKind_Immediate:9392 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);9393 continue;9394 9395 case kIemNativeVarKind_VarRef:9396 {9397 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */9398 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));9399 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));9400 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);9401 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;9402 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR9403 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;9404 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;9405 if ( fSimdReg9406 && idxRegOther != UINT8_MAX)9407 {9408 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));9409 if (cbVar == sizeof(RTUINT128U))9410 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);9411 else9412 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);9413 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */9414 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);9415 }9416 else9417 # endif9418 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))9419 {9420 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);9421 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */9422 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);9423 }9424 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX9425 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);9426 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);9427 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);9428 continue;9429 }9430 9431 case kIemNativeVarKind_GstRegRef:9432 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,9433 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);9434 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);9435 continue;9436 9437 case kIemNativeVarKind_Invalid:9438 case kIemNativeVarKind_End:9439 break;9440 }9441 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));9442 }9443 }9444 # if 0 //def VBOX_STRICT9445 iemNativeRegAssertSanity(pReNative);9446 # endif9447 }9448 #else9449 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);9450 #endif9451 9452 /*9453 * Make sure the argument variables are loaded into their respective registers.9454 *9455 * We can optimize this by ASSUMING that any register allocations are for9456 * registeres that have already been loaded and are ready. The previous step9457 * saw to that.9458 */9459 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))9460 {9461 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)9462 {9463 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];9464 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))9465 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])9466 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i9467 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);9468 else9469 {9470 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */9471 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))9472 {9473 Assert(pVar->enmKind == kIemNativeVarKind_Stack);9474 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);9475 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))9476 | RT_BIT_32(idxArgReg);9477 pVar->idxReg = idxArgReg;9478 }9479 else9480 {9481 /* Use ARG0 as temp for stuff we need registers for. */9482 switch (pVar->enmKind)9483 {9484 case kIemNativeVarKind_Stack:9485 {9486 uint8_t const idxStackSlot = pVar->idxStackSlot;9487 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));9488 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));9489 continue;9490 }9491 9492 case kIemNativeVarKind_Immediate:9493 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);9494 continue;9495 9496 case kIemNativeVarKind_VarRef:9497 {9498 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */9499 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));9500 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,9501 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));9502 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);9503 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;9504 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR9505 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;9506 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;9507 if ( fSimdReg9508 && idxRegOther != UINT8_MAX)9509 {9510 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));9511 if (cbVar == sizeof(RTUINT128U))9512 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);9513 else9514 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);9515 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */9516 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);9517 }9518 else9519 #endif9520 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))9521 {9522 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);9523 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */9524 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);9525 }9526 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX9527 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);9528 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);9529 continue;9530 }9531 9532 case kIemNativeVarKind_GstRegRef:9533 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,9534 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);9535 continue;9536 9537 case kIemNativeVarKind_Invalid:9538 case kIemNativeVarKind_End:9539 break;9540 }9541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));9542 }9543 }9544 }9545 #if 0 //def VBOX_STRICT9546 iemNativeRegAssertSanity(pReNative);9547 #endif9548 }9549 #ifdef VBOX_STRICT9550 else9551 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)9552 {9553 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);9554 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);9555 }9556 #endif9557 9558 /*9559 * Free all argument variables (simplified).9560 * Their lifetime always expires with the call they are for.9561 */9562 /** @todo Make the python script check that arguments aren't used after9563 * IEM_MC_CALL_XXXX. */9564 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring9565 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with9566 * an argument value. There is also some FPU stuff. */9567 for (uint32_t i = cHiddenArgs; i < cArgs; i++)9568 {9569 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */9570 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));9571 9572 /* no need to free registers: */9573 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT9574 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]9575 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX9576 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,9577 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,9578 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));9579 9580 pReNative->Core.aidxArgVars[i] = UINT8_MAX;9581 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);9582 iemNativeVarFreeStackSlots(pReNative, idxVar);9583 }9584 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);9585 9586 /*9587 * Flush volatile registers as we make the call.9588 */9589 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);9590 9591 return off;9592 }9593 9594 9595 9596 /*********************************************************************************************************************************9597 * TLB Lookup. *9598 *********************************************************************************************************************************/9599 9600 /**9601 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.9602 */9603 DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)9604 {9605 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);9606 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);9607 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;9608 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));9609 9610 /* Do the lookup manually. */9611 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;9612 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);9613 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);9614 if (RT_LIKELY(pTlbe->uTag == uTag))9615 {9616 /*9617 * Check TLB page table level access flags.9618 */9619 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);9620 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;9621 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 09622 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;9623 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR39624 | IEMTLBE_F_PG_UNASSIGNED9625 | IEMTLBE_F_PT_NO_ACCESSED9626 | fNoWriteNoDirty | fNoUser);9627 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;9628 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))9629 {9630 /*9631 * Return the address.9632 */9633 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];9634 if ((uintptr_t)pbAddr == uResult)9635 return;9636 RT_NOREF(cbMem);9637 AssertFailed();9638 }9639 else9640 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",9641 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));9642 }9643 else9644 AssertFailed();9645 RT_BREAKPOINT();9646 }9647 9648 /* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */9649 9650 9651 9652 /*********************************************************************************************************************************9653 * Recompiler Core. *9654 *********************************************************************************************************************************/9655 9656 /** @callback_method_impl{FNDISREADBYTES, Dummy.} */9657 static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)9658 {9659 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);9660 pDis->cbCachedInstr += cbMaxRead;9661 RT_NOREF(cbMinRead);9662 return VERR_NO_DATA;9663 }9664 9665 9666 DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)9667 {9668 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =9669 {9670 #define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */9671 ENTRY(fLocalForcedActions),9672 ENTRY(iem.s.rcPassUp),9673 ENTRY(iem.s.fExec),9674 ENTRY(iem.s.pbInstrBuf),9675 ENTRY(iem.s.uInstrBufPc),9676 ENTRY(iem.s.GCPhysInstrBuf),9677 ENTRY(iem.s.cbInstrBufTotal),9678 ENTRY(iem.s.idxTbCurInstr),9679 #ifdef VBOX_WITH_STATISTICS9680 ENTRY(iem.s.StatNativeTlbHitsForFetch),9681 ENTRY(iem.s.StatNativeTlbHitsForStore),9682 ENTRY(iem.s.StatNativeTlbHitsForStack),9683 ENTRY(iem.s.StatNativeTlbHitsForMapped),9684 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),9685 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),9686 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),9687 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),9688 #endif9689 ENTRY(iem.s.DataTlb.aEntries),9690 ENTRY(iem.s.DataTlb.uTlbRevision),9691 ENTRY(iem.s.DataTlb.uTlbPhysRev),9692 ENTRY(iem.s.DataTlb.cTlbHits),9693 ENTRY(iem.s.CodeTlb.aEntries),9694 ENTRY(iem.s.CodeTlb.uTlbRevision),9695 ENTRY(iem.s.CodeTlb.uTlbPhysRev),9696 ENTRY(iem.s.CodeTlb.cTlbHits),9697 ENTRY(pVMR3),9698 ENTRY(cpum.GstCtx.rax),9699 ENTRY(cpum.GstCtx.ah),9700 ENTRY(cpum.GstCtx.rcx),9701 ENTRY(cpum.GstCtx.ch),9702 ENTRY(cpum.GstCtx.rdx),9703 ENTRY(cpum.GstCtx.dh),9704 ENTRY(cpum.GstCtx.rbx),9705 ENTRY(cpum.GstCtx.bh),9706 ENTRY(cpum.GstCtx.rsp),9707 ENTRY(cpum.GstCtx.rbp),9708 ENTRY(cpum.GstCtx.rsi),9709 ENTRY(cpum.GstCtx.rdi),9710 ENTRY(cpum.GstCtx.r8),9711 ENTRY(cpum.GstCtx.r9),9712 ENTRY(cpum.GstCtx.r10),9713 ENTRY(cpum.GstCtx.r11),9714 ENTRY(cpum.GstCtx.r12),9715 ENTRY(cpum.GstCtx.r13),9716 ENTRY(cpum.GstCtx.r14),9717 ENTRY(cpum.GstCtx.r15),9718 ENTRY(cpum.GstCtx.es.Sel),9719 ENTRY(cpum.GstCtx.es.u64Base),9720 ENTRY(cpum.GstCtx.es.u32Limit),9721 ENTRY(cpum.GstCtx.es.Attr),9722 ENTRY(cpum.GstCtx.cs.Sel),9723 ENTRY(cpum.GstCtx.cs.u64Base),9724 ENTRY(cpum.GstCtx.cs.u32Limit),9725 ENTRY(cpum.GstCtx.cs.Attr),9726 ENTRY(cpum.GstCtx.ss.Sel),9727 ENTRY(cpum.GstCtx.ss.u64Base),9728 ENTRY(cpum.GstCtx.ss.u32Limit),9729 ENTRY(cpum.GstCtx.ss.Attr),9730 ENTRY(cpum.GstCtx.ds.Sel),9731 ENTRY(cpum.GstCtx.ds.u64Base),9732 ENTRY(cpum.GstCtx.ds.u32Limit),9733 ENTRY(cpum.GstCtx.ds.Attr),9734 ENTRY(cpum.GstCtx.fs.Sel),9735 ENTRY(cpum.GstCtx.fs.u64Base),9736 ENTRY(cpum.GstCtx.fs.u32Limit),9737 ENTRY(cpum.GstCtx.fs.Attr),9738 ENTRY(cpum.GstCtx.gs.Sel),9739 ENTRY(cpum.GstCtx.gs.u64Base),9740 ENTRY(cpum.GstCtx.gs.u32Limit),9741 ENTRY(cpum.GstCtx.gs.Attr),9742 ENTRY(cpum.GstCtx.rip),9743 ENTRY(cpum.GstCtx.eflags),9744 ENTRY(cpum.GstCtx.uRipInhibitInt),9745 ENTRY(cpum.GstCtx.cr0),9746 ENTRY(cpum.GstCtx.cr4),9747 ENTRY(cpum.GstCtx.aXcr[0]),9748 ENTRY(cpum.GstCtx.aXcr[1]),9749 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR9750 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),9751 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),9752 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),9753 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),9754 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),9755 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),9756 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),9757 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),9758 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),9759 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),9760 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),9761 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),9762 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),9763 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),9764 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),9765 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),9766 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),9767 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),9768 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),9769 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),9770 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),9771 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),9772 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),9773 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),9774 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),9775 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),9776 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),9777 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),9778 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),9779 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),9780 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),9781 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])9782 #endif9783 #undef ENTRY9784 };9785 #ifdef VBOX_STRICT9786 static bool s_fOrderChecked = false;9787 if (!s_fOrderChecked)9788 {9789 s_fOrderChecked = true;9790 uint32_t offPrev = s_aMembers[0].off;9791 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)9792 {9793 Assert(s_aMembers[i].off > offPrev);9794 offPrev = s_aMembers[i].off;9795 }9796 }9797 #endif9798 9799 /*9800 * Binary lookup.9801 */9802 unsigned iStart = 0;9803 unsigned iEnd = RT_ELEMENTS(s_aMembers);9804 for (;;)9805 {9806 unsigned const iCur = iStart + (iEnd - iStart) / 2;9807 uint32_t const offCur = s_aMembers[iCur].off;9808 if (off < offCur)9809 {9810 if (iCur != iStart)9811 iEnd = iCur;9812 else9813 break;9814 }9815 else if (off > offCur)9816 {9817 if (iCur + 1 < iEnd)9818 iStart = iCur + 1;9819 else9820 break;9821 }9822 else9823 return s_aMembers[iCur].pszName;9824 }9825 #ifdef VBOX_WITH_STATISTICS9826 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))9827 return "iem.s.acThreadedFuncStats[iFn]";9828 #endif9829 return NULL;9830 }9831 9832 9833 DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT9834 {9835 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);9836 #if defined(RT_ARCH_AMD64)9837 static const char * const a_apszMarkers[] =9838 {9839 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",9840 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"9841 };9842 #endif9843 9844 char szDisBuf[512];9845 DISSTATE Dis;9846 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;9847 uint32_t const cNative = pTb->Native.cInstructions;9848 uint32_t offNative = 0;9849 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO9850 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;9851 #endif9852 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT9853 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT9854 : DISCPUMODE_64BIT;9855 #if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)9856 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;9857 #elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)9858 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;9859 #elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)9860 # error "Port me"9861 #else9862 csh hDisasm = ~(size_t)0;9863 # if defined(RT_ARCH_AMD64)9864 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);9865 # elif defined(RT_ARCH_ARM64)9866 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);9867 # else9868 # error "Port me"9869 # endif9870 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));9871 9872 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.9873 //Assert(rcCs == CS_ERR_OK);9874 #endif9875 9876 /*9877 * Print TB info.9878 */9879 pHlp->pfnPrintf(pHlp,9880 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"9881 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",9882 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,9883 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));9884 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO9885 if (pDbgInfo && pDbgInfo->cEntries > 1)9886 {9887 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);9888 9889 /*9890 * This disassembly is driven by the debug info which follows the native9891 * code and indicates when it starts with the next guest instructions,9892 * where labels are and such things.9893 */9894 uint32_t idxThreadedCall = 0;9895 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);9896 uint8_t idxRange = UINT8_MAX;9897 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));9898 uint32_t offRange = 0;9899 uint32_t offOpcodes = 0;9900 uint32_t const cbOpcodes = pTb->cbOpcodes;9901 RTGCPHYS GCPhysPc = pTb->GCPhysPc;9902 uint32_t const cDbgEntries = pDbgInfo->cEntries;9903 uint32_t iDbgEntry = 1;9904 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;9905 9906 while (offNative < cNative)9907 {9908 /* If we're at or have passed the point where the next chunk of debug9909 info starts, process it. */9910 if (offDbgNativeNext <= offNative)9911 {9912 offDbgNativeNext = UINT32_MAX;9913 for (; iDbgEntry < cDbgEntries; iDbgEntry++)9914 {9915 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)9916 {9917 case kIemTbDbgEntryType_GuestInstruction:9918 {9919 /* Did the exec flag change? */9920 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)9921 {9922 pHlp->pfnPrintf(pHlp,9923 " fExec change %#08x -> %#08x %s\n",9924 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,9925 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,9926 szDisBuf, sizeof(szDisBuf)));9927 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;9928 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT9929 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT9930 : DISCPUMODE_64BIT;9931 }9932 9933 /* New opcode range? We need to fend up a spurious debug info entry here for cases9934 where the compilation was aborted before the opcode was recorded and the actual9935 instruction was translated to a threaded call. This may happen when we run out9936 of ranges, or when some complicated interrupts/FFs are found to be pending or9937 similar. So, we just deal with it here rather than in the compiler code as it9938 is a lot simpler to do here. */9939 if ( idxRange == UINT8_MAX9940 || idxRange >= cRanges9941 || offRange >= pTb->aRanges[idxRange].cbOpcodes)9942 {9943 idxRange += 1;9944 if (idxRange < cRanges)9945 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;9946 else9947 continue;9948 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);9949 GCPhysPc = pTb->aRanges[idxRange].offPhysPage9950 + (pTb->aRanges[idxRange].idxPhysPage == 09951 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK9952 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);9953 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",9954 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,9955 pTb->aRanges[idxRange].idxPhysPage);9956 GCPhysPc += offRange;9957 }9958 9959 /* Disassemble the instruction. */9960 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);9961 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);9962 uint32_t cbInstr = 1;9963 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,9964 &pTb->pabOpcodes[offOpcodes], cbInstrMax,9965 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);9966 if (RT_SUCCESS(rc))9967 {9968 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),9969 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT9970 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,9971 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);9972 9973 static unsigned const s_offMarker = 55;9974 static char const s_szMarker[] = " ; <--- guest";9975 if (cch < s_offMarker)9976 {9977 memset(&szDisBuf[cch], ' ', s_offMarker - cch);9978 cch = s_offMarker;9979 }9980 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))9981 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));9982 9983 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);9984 }9985 else9986 {9987 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",9988 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);9989 cbInstr = 1;9990 }9991 GCPhysPc += cbInstr;9992 offOpcodes += cbInstr;9993 offRange += cbInstr;9994 continue;9995 }9996 9997 case kIemTbDbgEntryType_ThreadedCall:9998 pHlp->pfnPrintf(pHlp,9999 " Call #%u to %s (%u args) - %s\n",10000 idxThreadedCall,10001 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],10002 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],10003 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");10004 idxThreadedCall++;10005 continue;10006 10007 case kIemTbDbgEntryType_GuestRegShadowing:10008 {10009 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];10010 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;10011 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)10012 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,10013 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);10014 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)10015 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,10016 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);10017 else10018 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,10019 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],10020 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);10021 continue;10022 }10023 10024 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR10025 case kIemTbDbgEntryType_GuestSimdRegShadowing:10026 {10027 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];10028 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;10029 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)10030 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,10031 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);10032 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)10033 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,10034 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);10035 else10036 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,10037 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],10038 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);10039 continue;10040 }10041 #endif10042 10043 case kIemTbDbgEntryType_Label:10044 {10045 const char *pszName = "what_the_fudge";10046 const char *pszComment = "";10047 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;10048 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)10049 {10050 case kIemNativeLabelType_Return: pszName = "Return"; break;10051 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;10052 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;10053 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;10054 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;10055 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;10056 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;10057 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;10058 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;10059 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;10060 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;10061 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;10062 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;10063 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;10064 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;10065 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;10066 case kIemNativeLabelType_If:10067 pszName = "If";10068 fNumbered = true;10069 break;10070 case kIemNativeLabelType_Else:10071 pszName = "Else";10072 fNumbered = true;10073 pszComment = " ; regs state restored pre-if-block";10074 break;10075 case kIemNativeLabelType_Endif:10076 pszName = "Endif";10077 fNumbered = true;10078 break;10079 case kIemNativeLabelType_CheckIrq:10080 pszName = "CheckIrq_CheckVM";10081 fNumbered = true;10082 break;10083 case kIemNativeLabelType_TlbLookup:10084 pszName = "TlbLookup";10085 fNumbered = true;10086 break;10087 case kIemNativeLabelType_TlbMiss:10088 pszName = "TlbMiss";10089 fNumbered = true;10090 break;10091 case kIemNativeLabelType_TlbDone:10092 pszName = "TlbDone";10093 fNumbered = true;10094 break;10095 case kIemNativeLabelType_Invalid:10096 case kIemNativeLabelType_End:10097 break;10098 }10099 if (fNumbered)10100 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);10101 else10102 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);10103 continue;10104 }10105 10106 case kIemTbDbgEntryType_NativeOffset:10107 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;10108 Assert(offDbgNativeNext >= offNative);10109 break;10110 10111 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING10112 case kIemTbDbgEntryType_DelayedPcUpdate:10113 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",10114 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,10115 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);10116 continue;10117 #endif10118 10119 #ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK10120 case kIemTbDbgEntryType_GuestRegDirty:10121 {10122 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];10123 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg10124 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName10125 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;10126 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg10127 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]10128 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];10129 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",10130 pszGstReg, pszHstReg);10131 continue;10132 }10133 10134 case kIemTbDbgEntryType_GuestRegWriteback:10135 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",10136 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",10137 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg);10138 continue;10139 #endif10140 10141 default:10142 AssertFailed();10143 }10144 iDbgEntry++;10145 break;10146 }10147 }10148 10149 /*10150 * Disassemble the next native instruction.10151 */10152 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];10153 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER10154 uint32_t cbInstr = sizeof(paNative[0]);10155 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);10156 if (RT_SUCCESS(rc))10157 {10158 # if defined(RT_ARCH_AMD64)10159 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */10160 {10161 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];10162 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)10163 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",10164 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],10165 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],10166 uInfo & 0x8000 ? "recompiled" : "todo");10167 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))10168 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);10169 else10170 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);10171 }10172 else10173 # endif10174 {10175 const char *pszAnnotation = NULL;10176 # ifdef RT_ARCH_AMD6410177 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),10178 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT10179 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,10180 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);10181 PCDISOPPARAM pMemOp;10182 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))10183 pMemOp = &Dis.Param1;10184 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))10185 pMemOp = &Dis.Param2;10186 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))10187 pMemOp = &Dis.Param3;10188 else10189 pMemOp = NULL;10190 if ( pMemOp10191 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU10192 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))10193 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT3210194 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);10195 10196 #elif defined(RT_ARCH_ARM64)10197 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),10198 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,10199 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);10200 # else10201 # error "Port me"10202 # endif10203 if (pszAnnotation)10204 {10205 static unsigned const s_offAnnotation = 55;10206 size_t const cchAnnotation = strlen(pszAnnotation);10207 size_t cchDis = strlen(szDisBuf);10208 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))10209 {10210 if (cchDis < s_offAnnotation)10211 {10212 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);10213 cchDis = s_offAnnotation;10214 }10215 szDisBuf[cchDis++] = ' ';10216 szDisBuf[cchDis++] = ';';10217 szDisBuf[cchDis++] = ' ';10218 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);10219 }10220 }10221 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);10222 }10223 }10224 else10225 {10226 # if defined(RT_ARCH_AMD64)10227 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",10228 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);10229 # elif defined(RT_ARCH_ARM64)10230 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);10231 # else10232 # error "Port me"10233 # endif10234 cbInstr = sizeof(paNative[0]);10235 }10236 offNative += cbInstr / sizeof(paNative[0]);10237 10238 # else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */10239 cs_insn *pInstr;10240 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),10241 (uintptr_t)pNativeCur, 1, &pInstr);10242 if (cInstrs > 0)10243 {10244 Assert(cInstrs == 1);10245 const char *pszAnnotation = NULL;10246 # if defined(RT_ARCH_ARM64)10247 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)10248 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))10249 {10250 /* This is bit crappy, but the disassembler provides incomplete addressing details. */10251 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);10252 char *psz = strchr(pInstr->op_str, '[');10253 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))10254 {10255 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);10256 int32_t off = -1;10257 psz += 4;10258 if (*psz == ']')10259 off = 0;10260 else if (*psz == ',')10261 {10262 psz = RTStrStripL(psz + 1);10263 if (*psz == '#')10264 off = RTStrToInt32(&psz[1]);10265 /** @todo deal with index registers and LSL as well... */10266 }10267 if (off >= 0)10268 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);10269 }10270 }10271 # endif10272 10273 size_t const cchOp = strlen(pInstr->op_str);10274 # if defined(RT_ARCH_AMD64)10275 if (pszAnnotation)10276 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",10277 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,10278 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);10279 else10280 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",10281 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);10282 10283 # else10284 if (pszAnnotation)10285 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",10286 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,10287 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);10288 else10289 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",10290 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);10291 # endif10292 offNative += pInstr->size / sizeof(*pNativeCur);10293 cs_free(pInstr, cInstrs);10294 }10295 else10296 {10297 # if defined(RT_ARCH_AMD64)10298 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",10299 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));10300 # else10301 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));10302 # endif10303 offNative++;10304 }10305 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */10306 }10307 }10308 else10309 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */10310 {10311 /*10312 * No debug info, just disassemble the x86 code and then the native code.10313 *10314 * First the guest code:10315 */10316 for (unsigned i = 0; i < pTb->cRanges; i++)10317 {10318 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage10319 + (pTb->aRanges[i].idxPhysPage == 010320 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK10321 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);10322 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",10323 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);10324 unsigned off = pTb->aRanges[i].offOpcodes;10325 /** @todo this ain't working when crossing pages! */10326 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;10327 while (off < cbOpcodes)10328 {10329 uint32_t cbInstr = 1;10330 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,10331 &pTb->pabOpcodes[off], cbOpcodes - off,10332 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);10333 if (RT_SUCCESS(rc))10334 {10335 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),10336 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT10337 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,10338 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);10339 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);10340 GCPhysPc += cbInstr;10341 off += cbInstr;10342 }10343 else10344 {10345 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",10346 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);10347 break;10348 }10349 }10350 }10351 10352 /*10353 * Then the native code:10354 */10355 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);10356 while (offNative < cNative)10357 {10358 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];10359 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER10360 uint32_t cbInstr = sizeof(paNative[0]);10361 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);10362 if (RT_SUCCESS(rc))10363 {10364 # if defined(RT_ARCH_AMD64)10365 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */10366 {10367 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];10368 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)10369 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",10370 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],10371 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],10372 uInfo & 0x8000 ? "recompiled" : "todo");10373 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))10374 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);10375 else10376 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);10377 }10378 else10379 # endif10380 {10381 # ifdef RT_ARCH_AMD6410382 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),10383 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT10384 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,10385 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);10386 # elif defined(RT_ARCH_ARM64)10387 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),10388 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,10389 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);10390 # else10391 # error "Port me"10392 # endif10393 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);10394 }10395 }10396 else10397 {10398 # if defined(RT_ARCH_AMD64)10399 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",10400 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);10401 # else10402 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);10403 # endif10404 cbInstr = sizeof(paNative[0]);10405 }10406 offNative += cbInstr / sizeof(paNative[0]);10407 10408 # else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */10409 cs_insn *pInstr;10410 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),10411 (uintptr_t)pNativeCur, 1, &pInstr);10412 if (cInstrs > 0)10413 {10414 Assert(cInstrs == 1);10415 # if defined(RT_ARCH_AMD64)10416 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",10417 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);10418 # else10419 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",10420 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);10421 # endif10422 offNative += pInstr->size / sizeof(*pNativeCur);10423 cs_free(pInstr, cInstrs);10424 }10425 else10426 {10427 # if defined(RT_ARCH_AMD64)10428 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",10429 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));10430 # else10431 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));10432 # endif10433 offNative++;10434 }10435 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */10436 }10437 }10438 10439 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER10440 /* Cleanup. */10441 cs_close(&hDisasm);10442 #endif10443 }10444 10445 10446 /**10447 * Recompiles the given threaded TB into a native one.10448 *10449 * In case of failure the translation block will be returned as-is.10450 *10451 * @returns pTb.10452 * @param pVCpu The cross context virtual CPU structure of the calling10453 * thread.10454 * @param pTb The threaded translation to recompile to native.10455 */10456 DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT10457 {10458 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);10459 10460 /*10461 * The first time thru, we allocate the recompiler state, the other times10462 * we just need to reset it before using it again.10463 */10464 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;10465 if (RT_LIKELY(pReNative))10466 iemNativeReInit(pReNative, pTb);10467 else10468 {10469 pReNative = iemNativeInit(pVCpu, pTb);10470 AssertReturn(pReNative, pTb);10471 }10472 10473 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS10474 /*10475 * First do liveness analysis. This is done backwards.10476 */10477 {10478 uint32_t idxCall = pTb->Thrd.cCalls;10479 if (idxCall <= pReNative->cLivenessEntriesAlloc)10480 { /* likely */ }10481 else10482 {10483 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);10484 while (idxCall > cAlloc)10485 cAlloc *= 2;10486 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);10487 AssertReturn(pvNew, pTb);10488 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;10489 pReNative->cLivenessEntriesAlloc = cAlloc;10490 }10491 AssertReturn(idxCall > 0, pTb);10492 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;10493 10494 /* The initial (final) entry. */10495 idxCall--;10496 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);10497 10498 /* Loop backwards thru the calls and fill in the other entries. */10499 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];10500 while (idxCall > 0)10501 {10502 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];10503 if (pfnLiveness)10504 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);10505 else10506 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);10507 pCallEntry--;10508 idxCall--;10509 }10510 10511 # ifdef VBOX_WITH_STATISTICS10512 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them10513 to 'clobbered' rather that 'input'. */10514 /** @todo */10515 # endif10516 }10517 #endif10518 10519 /*10520 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp10521 * for aborting if an error happens.10522 */10523 uint32_t cCallsLeft = pTb->Thrd.cCalls;10524 #ifdef LOG_ENABLED10525 uint32_t const cCallsOrg = cCallsLeft;10526 #endif10527 uint32_t off = 0;10528 int rc = VINF_SUCCESS;10529 IEMNATIVE_TRY_SETJMP(pReNative, rc)10530 {10531 /*10532 * Emit prolog code (fixed).10533 */10534 off = iemNativeEmitProlog(pReNative, off);10535 10536 /*10537 * Convert the calls to native code.10538 */10539 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO10540 int32_t iGstInstr = -1;10541 #endif10542 #ifndef VBOX_WITHOUT_RELEASE_STATISTICS10543 uint32_t cThreadedCalls = 0;10544 uint32_t cRecompiledCalls = 0;10545 #endif10546 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)10547 uint32_t idxCurCall = 0;10548 #endif10549 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;10550 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;10551 while (cCallsLeft-- > 0)10552 {10553 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];10554 #ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS10555 pReNative->idxCurCall = idxCurCall;10556 #endif10557 10558 /*10559 * Debug info, assembly markup and statistics.10560 */10561 #if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)10562 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)10563 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;10564 #endif10565 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO10566 iemNativeDbgInfoAddNativeOffset(pReNative, off);10567 if (iGstInstr < (int32_t)pCallEntry->idxInstr)10568 {10569 if (iGstInstr < (int32_t)pTb->cInstructions)10570 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);10571 else10572 Assert(iGstInstr == pTb->cInstructions);10573 iGstInstr = pCallEntry->idxInstr;10574 }10575 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);10576 #endif10577 #if defined(VBOX_STRICT)10578 off = iemNativeEmitMarker(pReNative, off,10579 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));10580 #endif10581 #if defined(VBOX_STRICT)10582 iemNativeRegAssertSanity(pReNative);10583 #endif10584 #ifdef VBOX_WITH_STATISTICS10585 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);10586 #endif10587 10588 /*10589 * Actual work.10590 */10591 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],10592 pfnRecom ? "(recompiled)" : "(todo)"));10593 if (pfnRecom) /** @todo stats on this. */10594 {10595 off = pfnRecom(pReNative, off, pCallEntry);10596 STAM_REL_STATS({cRecompiledCalls++;});10597 }10598 else10599 {10600 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);10601 STAM_REL_STATS({cThreadedCalls++;});10602 }10603 Assert(off <= pReNative->cInstrBufAlloc);10604 Assert(pReNative->cCondDepth == 0);10605 10606 #if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)10607 if (LogIs2Enabled())10608 {10609 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];10610 # ifndef IEMLIVENESS_EXTENDED_LAYOUT10611 static const char s_achState[] = "CUXI";10612 # else10613 static const char s_achState[] = "UxRrWwMmCcQqKkNn";10614 # endif10615 10616 char szGpr[17];10617 for (unsigned i = 0; i < 16; i++)10618 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];10619 szGpr[16] = '\0';10620 10621 char szSegBase[X86_SREG_COUNT + 1];10622 char szSegLimit[X86_SREG_COUNT + 1];10623 char szSegAttrib[X86_SREG_COUNT + 1];10624 char szSegSel[X86_SREG_COUNT + 1];10625 for (unsigned i = 0; i < X86_SREG_COUNT; i++)10626 {10627 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];10628 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];10629 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];10630 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];10631 }10632 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]10633 = szSegSel[X86_SREG_COUNT] = '\0';10634 10635 char szEFlags[8];10636 for (unsigned i = 0; i < 7; i++)10637 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];10638 szEFlags[7] = '\0';10639 10640 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",10641 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));10642 }10643 #endif10644 10645 /*10646 * Advance.10647 */10648 pCallEntry++;10649 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)10650 idxCurCall++;10651 #endif10652 }10653 10654 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);10655 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);10656 if (!cThreadedCalls)10657 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);10658 10659 /*10660 * Emit the epilog code.10661 */10662 uint32_t idxReturnLabel;10663 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);10664 10665 /*10666 * Generate special jump labels.10667 */10668 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))10669 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);10670 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))10671 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);10672 10673 /*10674 * Generate simple TB tail labels that just calls a help with a pVCpu10675 * arg and either return or longjmps/throws a non-zero status.10676 *10677 * The array entries must be ordered by enmLabel value so we can index10678 * using fTailLabels bit numbers.10679 */10680 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));10681 static struct10682 {10683 IEMNATIVELABELTYPE enmLabel;10684 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;10685 } const g_aSimpleTailLabels[] =10686 {10687 { kIemNativeLabelType_Invalid, NULL },10688 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },10689 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },10690 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },10691 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },10692 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },10693 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },10694 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },10695 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },10696 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },10697 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },10698 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },10699 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },10700 };10701 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);10702 AssertCompile(kIemNativeLabelType_Invalid == 0);10703 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);10704 if (fTailLabels)10705 {10706 do10707 {10708 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);10709 fTailLabels &= ~RT_BIT_64(enmLabel);10710 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);10711 10712 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);10713 Assert(idxLabel != UINT32_MAX);10714 if (idxLabel != UINT32_MAX)10715 {10716 iemNativeLabelDefine(pReNative, idxLabel, off);10717 10718 /* int pfnCallback(PVMCPUCC pVCpu) */10719 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);10720 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);10721 10722 /* jump back to the return sequence. */10723 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);10724 }10725 10726 } while (fTailLabels);10727 }10728 }10729 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);10730 {10731 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));10732 return pTb;10733 }10734 IEMNATIVE_CATCH_LONGJMP_END(pReNative);10735 Assert(off <= pReNative->cInstrBufAlloc);10736 10737 /*10738 * Make sure all labels has been defined.10739 */10740 PIEMNATIVELABEL const paLabels = pReNative->paLabels;10741 #ifdef VBOX_STRICT10742 uint32_t const cLabels = pReNative->cLabels;10743 for (uint32_t i = 0; i < cLabels; i++)10744 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);10745 #endif10746 10747 /*10748 * Allocate executable memory, copy over the code we've generated.10749 */10750 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;10751 if (pTbAllocator->pDelayedFreeHead)10752 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);10753 10754 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);10755 AssertReturn(paFinalInstrBuf, pTb);10756 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));10757 10758 /*10759 * Apply fixups.10760 */10761 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;10762 uint32_t const cFixups = pReNative->cFixups;10763 for (uint32_t i = 0; i < cFixups; i++)10764 {10765 Assert(paFixups[i].off < off);10766 Assert(paFixups[i].idxLabel < cLabels);10767 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,10768 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,10769 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));10770 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };10771 switch (paFixups[i].enmType)10772 {10773 #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)10774 case kIemNativeFixupType_Rel32:10775 Assert(paFixups[i].off + 4 <= off);10776 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;10777 continue;10778 10779 #elif defined(RT_ARCH_ARM64)10780 case kIemNativeFixupType_RelImm26At0:10781 {10782 Assert(paFixups[i].off < off);10783 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;10784 Assert(offDisp >= -262144 && offDisp < 262144);10785 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));10786 continue;10787 }10788 10789 case kIemNativeFixupType_RelImm19At5:10790 {10791 Assert(paFixups[i].off < off);10792 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;10793 Assert(offDisp >= -262144 && offDisp < 262144);10794 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);10795 continue;10796 }10797 10798 case kIemNativeFixupType_RelImm14At5:10799 {10800 Assert(paFixups[i].off < off);10801 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;10802 Assert(offDisp >= -8192 && offDisp < 8192);10803 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);10804 continue;10805 }10806 10807 #endif10808 case kIemNativeFixupType_Invalid:10809 case kIemNativeFixupType_End:10810 break;10811 }10812 AssertFailed();10813 }10814 10815 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));10816 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));10817 10818 /*10819 * Convert the translation block.10820 */10821 RTMemFree(pTb->Thrd.paCalls);10822 pTb->Native.paInstructions = paFinalInstrBuf;10823 pTb->Native.cInstructions = off;10824 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;10825 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO10826 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */10827 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));10828 #endif10829 10830 Assert(pTbAllocator->cThreadedTbs > 0);10831 pTbAllocator->cThreadedTbs -= 1;10832 pTbAllocator->cNativeTbs += 1;10833 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);10834 10835 #ifdef LOG_ENABLED10836 /*10837 * Disassemble to the log if enabled.10838 */10839 if (LogIs3Enabled())10840 {10841 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));10842 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());10843 # if defined(DEBUG_bird) || defined(DEBUG_aeichner)10844 RTLogFlush(NULL);10845 # endif10846 }10847 #endif10848 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/10849 10850 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);10851 return pTb;10852 }10853 -
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r104114 r104115 67 67 #endif 68 68 69 #ifdef RT_OS_WINDOWS70 # include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */71 extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);72 extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);73 #else74 # include <iprt/formats/dwarf.h>75 # if defined(RT_OS_DARWIN)76 # include <libkern/OSCacheControl.h>77 # define IEMNATIVE_USE_LIBUNWIND78 extern "C" void __register_frame(const void *pvFde);79 extern "C" void __deregister_frame(const void *pvFde);80 # else81 # ifdef DEBUG_bird /** @todo not thread safe yet */82 # define IEMNATIVE_USE_GDB_JIT83 # endif84 # ifdef IEMNATIVE_USE_GDB_JIT85 # include <iprt/critsect.h>86 # include <iprt/once.h>87 # include <iprt/formats/elf64.h>88 # endif89 extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */90 extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */91 # endif92 #endif93 69 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER 94 70 # include "/opt/local/include/capstone/capstone.h" … … 137 113 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar); 138 114 139 140 /*********************************************************************************************************************************141 * Executable Memory Allocator *142 *********************************************************************************************************************************/143 /** The chunk sub-allocation unit size in bytes. */144 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128145 /** The chunk sub-allocation unit size as a shift factor. */146 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7147 /** Enables adding a header to the sub-allocator allocations.148 * This is useful for freeing up executable memory among other things. */149 #define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER150 /** Use alternative pruning. */151 #define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING152 153 154 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)155 # ifdef IEMNATIVE_USE_GDB_JIT156 # define IEMNATIVE_USE_GDB_JIT_ET_DYN157 158 /** GDB JIT: Code entry. */159 typedef struct GDBJITCODEENTRY160 {161 struct GDBJITCODEENTRY *pNext;162 struct GDBJITCODEENTRY *pPrev;163 uint8_t *pbSymFile;164 uint64_t cbSymFile;165 } GDBJITCODEENTRY;166 167 /** GDB JIT: Actions. */168 typedef enum GDBJITACTIONS : uint32_t169 {170 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister171 } GDBJITACTIONS;172 173 /** GDB JIT: Descriptor. */174 typedef struct GDBJITDESCRIPTOR175 {176 uint32_t uVersion;177 GDBJITACTIONS enmAction;178 GDBJITCODEENTRY *pRelevant;179 GDBJITCODEENTRY *pHead;180 /** Our addition: */181 GDBJITCODEENTRY *pTail;182 } GDBJITDESCRIPTOR;183 184 /** GDB JIT: Our simple symbol file data. */185 typedef struct GDBJITSYMFILE186 {187 Elf64_Ehdr EHdr;188 # ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN189 Elf64_Shdr aShdrs[5];190 # else191 Elf64_Shdr aShdrs[7];192 Elf64_Phdr aPhdrs[2];193 # endif194 /** The dwarf ehframe data for the chunk. */195 uint8_t abEhFrame[512];196 char szzStrTab[128];197 Elf64_Sym aSymbols[3];198 # ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN199 Elf64_Sym aDynSyms[2];200 Elf64_Dyn aDyn[6];201 # endif202 } GDBJITSYMFILE;203 204 extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;205 extern "C" DECLEXPORT(void) __jit_debug_register_code(void);206 207 /** Init once for g_IemNativeGdbJitLock. */208 static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;209 /** Init once for the critical section. */210 static RTCRITSECT g_IemNativeGdbJitLock;211 212 /** GDB reads the info here. */213 GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };214 215 /** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */216 DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)217 {218 ASMNopPause();219 }220 221 /** @callback_method_impl{FNRTONCE} */222 static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)223 {224 RT_NOREF(pvUser);225 return RTCritSectInit(&g_IemNativeGdbJitLock);226 }227 228 229 # endif /* IEMNATIVE_USE_GDB_JIT */230 231 /**232 * Per-chunk unwind info for non-windows hosts.233 */234 typedef struct IEMEXECMEMCHUNKEHFRAME235 {236 # ifdef IEMNATIVE_USE_LIBUNWIND237 /** The offset of the FDA into abEhFrame. */238 uintptr_t offFda;239 # else240 /** 'struct object' storage area. */241 uint8_t abObject[1024];242 # endif243 # ifdef IEMNATIVE_USE_GDB_JIT244 # if 0245 /** The GDB JIT 'symbol file' data. */246 GDBJITSYMFILE GdbJitSymFile;247 # endif248 /** The GDB JIT list entry. */249 GDBJITCODEENTRY GdbJitEntry;250 # endif251 /** The dwarf ehframe data for the chunk. */252 uint8_t abEhFrame[512];253 } IEMEXECMEMCHUNKEHFRAME;254 /** Pointer to per-chunk info info for non-windows hosts. */255 typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;256 #endif257 258 259 /**260 * An chunk of executable memory.261 */262 typedef struct IEMEXECMEMCHUNK263 {264 /** Number of free items in this chunk. */265 uint32_t cFreeUnits;266 /** Hint were to start searching for free space in the allocation bitmap. */267 uint32_t idxFreeHint;268 /** Pointer to the chunk. */269 void *pvChunk;270 #ifdef IN_RING3271 /**272 * Pointer to the unwind information.273 *274 * This is used during C++ throw and longjmp (windows and probably most other275 * platforms). Some debuggers (windbg) makes use of it as well.276 *277 * Windows: This is allocated from hHeap on windows because (at least for278 * AMD64) the UNWIND_INFO structure address in the279 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".280 *281 * Others: Allocated from the regular heap to avoid unnecessary executable data282 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */283 void *pvUnwindInfo;284 #elif defined(IN_RING0)285 /** Allocation handle. */286 RTR0MEMOBJ hMemObj;287 #endif288 } IEMEXECMEMCHUNK;289 /** Pointer to a memory chunk. */290 typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;291 292 293 /**294 * Executable memory allocator for the native recompiler.295 */296 typedef struct IEMEXECMEMALLOCATOR297 {298 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */299 uint32_t uMagic;300 301 /** The chunk size. */302 uint32_t cbChunk;303 /** The maximum number of chunks. */304 uint32_t cMaxChunks;305 /** The current number of chunks. */306 uint32_t cChunks;307 /** Hint where to start looking for available memory. */308 uint32_t idxChunkHint;309 /** Statistics: Current number of allocations. */310 uint32_t cAllocations;311 312 /** The total amount of memory available. */313 uint64_t cbTotal;314 /** Total amount of free memory. */315 uint64_t cbFree;316 /** Total amount of memory allocated. */317 uint64_t cbAllocated;318 319 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).320 *321 * Since the chunk size is a power of two and the minimum chunk size is a lot322 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always323 * require a whole number of uint64_t elements in the allocation bitmap. So,324 * for sake of simplicity, they are allocated as one continous chunk for325 * simplicity/laziness. */326 uint64_t *pbmAlloc;327 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */328 uint32_t cUnitsPerChunk;329 /** Number of bitmap elements per chunk (for quickly locating the bitmap330 * portion corresponding to an chunk). */331 uint32_t cBitmapElementsPerChunk;332 333 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING334 /** The next chunk to prune in. */335 uint32_t idxChunkPrune;336 /** Where in chunk offset to start pruning at. */337 uint32_t offChunkPrune;338 /** Profiling the pruning code. */339 STAMPROFILE StatPruneProf;340 /** Number of bytes recovered by the pruning. */341 STAMPROFILE StatPruneRecovered;342 #endif343 344 #ifdef VBOX_WITH_STATISTICS345 STAMPROFILE StatAlloc;346 #endif347 348 349 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)350 /** Pointer to the array of unwind info running parallel to aChunks (same351 * allocation as this structure, located after the bitmaps).352 * (For Windows, the structures must reside in 32-bit RVA distance to the353 * actual chunk, so they are allocated off the chunk.) */354 PIEMEXECMEMCHUNKEHFRAME paEhFrames;355 #endif356 357 /** The allocation chunks. */358 RT_FLEXIBLE_ARRAY_EXTENSION359 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];360 } IEMEXECMEMALLOCATOR;361 /** Pointer to an executable memory allocator. */362 typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;363 364 /** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */365 #define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)366 367 368 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER369 /**370 * Allocation header.371 */372 typedef struct IEMEXECMEMALLOCHDR373 {374 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */375 uint32_t uMagic;376 /** The allocation chunk (for speeding up freeing). */377 uint32_t idxChunk;378 /** Pointer to the translation block the allocation belongs to.379 * This is the whole point of the header. */380 PIEMTB pTb;381 } IEMEXECMEMALLOCHDR;382 /** Pointer to an allocation header. */383 typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;384 /** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */385 # define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)386 #endif387 388 389 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);390 391 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING392 /**393 * Frees up executable memory when we're out space.394 *395 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up396 * space in a more linear fashion from the allocator's point of view. It may397 * also defragment if implemented & enabled398 */399 static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)400 {401 # ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER402 # error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"403 # endif404 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);405 406 /*407 * Before we can start, we must process delayed frees.408 */409 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);410 411 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));412 413 uint32_t const cbChunk = pExecMemAllocator->cbChunk;414 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));415 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */416 417 uint32_t const cChunks = pExecMemAllocator->cChunks;418 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);419 AssertReturnVoid(cChunks >= 1);420 421 /*422 * Decide how much to prune. The chunk is is a multiple of two, so we'll be423 * scanning a multiple of two here as well.424 */425 uint32_t cbToPrune = cbChunk;426 427 /* Never more than 25%. */428 if (cChunks < 4)429 cbToPrune /= cChunks == 1 ? 4 : 2;430 431 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */432 if (cbToPrune > _4M)433 cbToPrune = _4M;434 435 /*436 * Adjust the pruning chunk and offset accordingly.437 */438 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;439 uint32_t offChunk = pExecMemAllocator->offChunkPrune;440 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);441 if (offChunk >= cbChunk)442 {443 offChunk = 0;444 idxChunk += 1;445 }446 if (idxChunk >= cChunks)447 {448 offChunk = 0;449 idxChunk = 0;450 }451 452 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);453 454 /*455 * Do the pruning. The current approach is the sever kind.456 */457 uint64_t cbPruned = 0;458 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunk;459 while (offChunk < offPruneEnd)460 {461 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];462 463 /* Is this the start of an allocation block for TB? (We typically have464 one allocation at the start of each chunk for the unwind info where465 pTb is NULL.) */466 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC467 && pHdr->pTb != NULL468 && pHdr->idxChunk == idxChunk)469 {470 PIEMTB const pTb = pHdr->pTb;471 AssertPtr(pTb);472 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);473 474 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),475 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);476 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */477 478 iemTbAllocatorFree(pVCpu, pTb);479 480 cbPruned += cbBlock;481 offChunk += cbBlock;482 }483 else484 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;485 }486 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);487 488 /*489 * Save the current pruning point.490 */491 pExecMemAllocator->offChunkPrune = offChunk;492 pExecMemAllocator->idxChunkPrune = idxChunk;493 494 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);495 }496 #endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */497 498 499 /**500 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating501 * the heap statistics.502 */503 static void *iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,504 uint32_t cbReq, uint32_t idxChunk)505 {506 pExecMemAllocator->cAllocations += 1;507 pExecMemAllocator->cbAllocated += cbReq;508 pExecMemAllocator->cbFree -= cbReq;509 pExecMemAllocator->idxChunkHint = idxChunk;510 511 #ifdef RT_OS_DARWIN512 /*513 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive514 * on darwin. So, we mark the pages returned as read+write after alloc and515 * expect the caller to call iemExecMemAllocatorReadyForUse when done516 * writing to the allocation.517 *518 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon519 * for details.520 */521 /** @todo detect if this is necessary... it wasn't required on 10.15 or522 * whatever older version it was. */523 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);524 AssertRC(rc);525 #endif526 527 return pvRet;528 }529 530 531 static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,532 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb)533 {534 /*535 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.536 */537 Assert(!(cToScan & 63));538 Assert(!(idxFirst & 63));539 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);540 pbmAlloc += idxFirst / 64;541 542 /*543 * Scan the bitmap for cReqUnits of consequtive clear bits544 */545 /** @todo This can probably be done more efficiently for non-x86 systems. */546 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);547 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)548 {549 uint32_t idxAddBit = 1;550 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))551 idxAddBit++;552 if (idxAddBit >= cReqUnits)553 {554 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);555 556 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];557 pChunk->cFreeUnits -= cReqUnits;558 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;559 560 # ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER561 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)((uint8_t *)pChunk->pvChunk562 + ( (idxFirst + (uint32_t)iBit)563 << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT));564 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;565 pHdr->idxChunk = idxChunk;566 pHdr->pTb = pTb;567 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pHdr + 1,568 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);569 #else570 RT_NOREF(pTb);571 void * const pvRet = (uint8_t *)pChunk->pvChunk572 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);573 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,574 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);575 #endif576 }577 578 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);579 }580 return NULL;581 }582 583 584 static void *585 iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb)586 {587 /*588 * Figure out how much to allocate.589 */590 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER591 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)592 #else593 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)594 #endif595 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;596 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)597 {598 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];599 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;600 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)601 {602 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,603 pExecMemAllocator->cUnitsPerChunk - idxHint,604 cReqUnits, idxChunk, pTb);605 if (pvRet)606 return pvRet;607 }608 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,609 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),610 cReqUnits, idxChunk, pTb);611 }612 return NULL;613 }614 615 616 /**617 * Allocates @a cbReq bytes of executable memory.618 *619 * @returns Pointer to the memory, NULL if out of memory or other problem620 * encountered.621 * @param pVCpu The cross context virtual CPU structure of the calling622 * thread.623 * @param cbReq How many bytes are required.624 * @param pTb The translation block that will be using the allocation.625 */626 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb)627 {628 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;629 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);630 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);631 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);632 633 for (unsigned iIteration = 0;; iIteration++)634 {635 if (cbReq <= pExecMemAllocator->cbFree)636 {637 uint32_t const cChunks = pExecMemAllocator->cChunks;638 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;639 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)640 {641 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);642 if (pvRet)643 {644 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);645 return pvRet;646 }647 }648 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)649 {650 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);651 if (pvRet)652 {653 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);654 return pvRet;655 }656 }657 }658 659 /*660 * Can we grow it with another chunk?661 */662 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)663 {664 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);665 AssertLogRelRCReturn(rc, NULL);666 667 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;668 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);669 if (pvRet)670 {671 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);672 return pvRet;673 }674 AssertFailed();675 }676 677 /*678 * Try prune native TBs once.679 */680 if (iIteration == 0)681 {682 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING683 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);684 #else685 /* No header included in the instruction count here. */686 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);687 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);688 #endif689 }690 else691 {692 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);693 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);694 return NULL;695 }696 }697 }698 699 700 /** This is a hook that we may need later for changing memory protection back701 * to readonly+exec */702 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)703 {704 #ifdef RT_OS_DARWIN705 /* See iemExecMemAllocatorAllocTailCode for the explanation. */706 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);707 AssertRC(rc); RT_NOREF(pVCpu);708 709 /*710 * Flush the instruction cache:711 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon712 */713 /* sys_dcache_flush(pv, cb); - not necessary */714 sys_icache_invalidate(pv, cb);715 #else716 RT_NOREF(pVCpu, pv, cb);717 #endif718 }719 720 721 /**722 * Frees executable memory.723 */724 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)725 {726 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;727 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);728 AssertPtr(pv);729 #ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER730 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));731 732 /* Align the size as we did when allocating the block. */733 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);734 735 #else736 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;737 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));738 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);739 uint32_t const idxChunk = pHdr->idxChunk;740 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);741 pv = pHdr;742 743 /* Adjust and align the size to cover the whole allocation area. */744 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);745 #endif746 747 /* Free it / assert sanity. */748 bool fFound = false;749 uint32_t const cbChunk = pExecMemAllocator->cbChunk;750 #ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER751 uint32_t const cChunks = pExecMemAllocator->cChunks;752 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)753 #endif754 {755 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;756 fFound = offChunk < cbChunk;757 if (fFound)758 {759 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;760 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;761 762 /* Check that it's valid and free it. */763 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];764 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));765 for (uint32_t i = 1; i < cReqUnits; i++)766 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));767 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);768 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER769 pHdr->uMagic = 0;770 pHdr->idxChunk = 0;771 pHdr->pTb = NULL;772 #endif773 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;774 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;775 776 /* Update the stats. */777 pExecMemAllocator->cbAllocated -= cb;778 pExecMemAllocator->cbFree += cb;779 pExecMemAllocator->cAllocations -= 1;780 return;781 }782 }783 AssertFailed();784 }785 786 787 788 #ifdef IN_RING3789 # ifdef RT_OS_WINDOWS790 791 /**792 * Initializes the unwind info structures for windows hosts.793 */794 static int795 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,796 void *pvChunk, uint32_t idxChunk)797 {798 RT_NOREF(pVCpu);799 800 /*801 * The AMD64 unwind opcodes.802 *803 * This is a program that starts with RSP after a RET instruction that804 * ends up in recompiled code, and the operations we describe here will805 * restore all non-volatile registers and bring RSP back to where our806 * RET address is. This means it's reverse order from what happens in807 * the prologue.808 *809 * Note! Using a frame register approach here both because we have one810 * and but mainly because the UWOP_ALLOC_LARGE argument values811 * would be a pain to write initializers for. On the positive812 * side, we're impervious to changes in the the stack variable813 * area can can deal with dynamic stack allocations if necessary.814 */815 static const IMAGE_UNWIND_CODE s_aOpcodes[] =816 {817 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */818 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */819 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */820 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */821 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */822 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */823 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */824 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */825 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */826 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */827 };828 union829 {830 IMAGE_UNWIND_INFO Info;831 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];832 } s_UnwindInfo =833 {834 {835 /* .Version = */ 1,836 /* .Flags = */ 0,837 /* .SizeOfProlog = */ 16, /* whatever */838 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),839 /* .FrameRegister = */ X86_GREG_xBP,840 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,841 }842 };843 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);844 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);845 846 /*847 * Calc how much space we need and allocate it off the exec heap.848 */849 unsigned const cFunctionEntries = 1;850 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);851 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;852 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions853 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL);854 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);855 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;856 857 /*858 * Initialize the structures.859 */860 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];861 862 paFunctions[0].BeginAddress = 0;863 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;864 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);865 866 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));867 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));868 869 /*870 * Register it.871 */872 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);873 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */874 875 return VINF_SUCCESS;876 }877 878 879 # else /* !RT_OS_WINDOWS */880 881 /**882 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).883 */884 DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)885 {886 if (iValue >= 64)887 {888 Assert(iValue < 0x2000);889 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;890 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;891 }892 else if (iValue >= 0)893 *Ptr.pb++ = (uint8_t)iValue;894 else if (iValue > -64)895 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;896 else897 {898 Assert(iValue > -0x2000);899 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;900 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;901 }902 return Ptr;903 }904 905 906 /**907 * Emits an ULEB128 encoded value (up to 64-bit wide).908 */909 DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)910 {911 while (uValue >= 0x80)912 {913 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;914 uValue >>= 7;915 }916 *Ptr.pb++ = (uint8_t)uValue;917 return Ptr;918 }919 920 921 /**922 * Emits a CFA rule as register @a uReg + offset @a off.923 */924 DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)925 {926 *Ptr.pb++ = DW_CFA_def_cfa;927 Ptr = iemDwarfPutUleb128(Ptr, uReg);928 Ptr = iemDwarfPutUleb128(Ptr, off);929 return Ptr;930 }931 932 933 /**934 * Emits a register (@a uReg) save location:935 * CFA + @a off * data_alignment_factor936 */937 DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)938 {939 if (uReg < 0x40)940 *Ptr.pb++ = DW_CFA_offset | uReg;941 else942 {943 *Ptr.pb++ = DW_CFA_offset_extended;944 Ptr = iemDwarfPutUleb128(Ptr, uReg);945 }946 Ptr = iemDwarfPutUleb128(Ptr, off);947 return Ptr;948 }949 950 951 # if 0 /* unused */952 /**953 * Emits a register (@a uReg) save location, using signed offset:954 * CFA + @a offSigned * data_alignment_factor955 */956 DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)957 {958 *Ptr.pb++ = DW_CFA_offset_extended_sf;959 Ptr = iemDwarfPutUleb128(Ptr, uReg);960 Ptr = iemDwarfPutLeb128(Ptr, offSigned);961 return Ptr;962 }963 # endif964 965 966 /**967 * Initializes the unwind info section for non-windows hosts.968 */969 static int970 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,971 void *pvChunk, uint32_t idxChunk)972 {973 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];974 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */975 976 RTPTRUNION Ptr = { pEhFrame->abEhFrame };977 978 /*979 * Generate the CIE first.980 */981 # ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */982 uint8_t const iDwarfVer = 3;983 # else984 uint8_t const iDwarfVer = 4;985 # endif986 RTPTRUNION const PtrCie = Ptr;987 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */988 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */989 *Ptr.pb++ = iDwarfVer; /* DwARF version */990 *Ptr.pb++ = 0; /* Augmentation. */991 if (iDwarfVer >= 4)992 {993 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */994 *Ptr.pb++ = 0; /* Segment selector size. */995 }996 # ifdef RT_ARCH_AMD64997 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */998 # else999 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */1000 # endif1001 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */1002 # ifdef RT_ARCH_AMD641003 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */1004 # elif defined(RT_ARCH_ARM64)1005 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */1006 # else1007 # error "port me"1008 # endif1009 /* Initial instructions: */1010 # ifdef RT_ARCH_AMD641011 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */1012 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */1013 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */1014 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */1015 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */1016 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */1017 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */1018 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */1019 # elif defined(RT_ARCH_ARM64)1020 # if 11021 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */1022 # else1023 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);1024 # endif1025 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */1026 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */1027 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */1028 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */1029 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */1030 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */1031 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */1032 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */1033 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */1034 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */1035 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */1036 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */1037 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);1038 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */1039 # else1040 # error "port me"1041 # endif1042 while ((Ptr.u - PtrCie.u) & 3)1043 *Ptr.pb++ = DW_CFA_nop;1044 /* Finalize the CIE size. */1045 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);1046 1047 /*1048 * Generate an FDE for the whole chunk area.1049 */1050 # ifdef IEMNATIVE_USE_LIBUNWIND1051 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];1052 # endif1053 RTPTRUNION const PtrFde = Ptr;1054 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */1055 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */1056 Ptr.pu32++;1057 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */1058 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */1059 # if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */1060 *Ptr.pb++ = DW_CFA_nop;1061 # endif1062 while ((Ptr.u - PtrFde.u) & 3)1063 *Ptr.pb++ = DW_CFA_nop;1064 /* Finalize the FDE size. */1065 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);1066 1067 /* Terminator entry. */1068 *Ptr.pu32++ = 0;1069 *Ptr.pu32++ = 0; /* just to be sure... */1070 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));1071 1072 /*1073 * Register it.1074 */1075 # ifdef IEMNATIVE_USE_LIBUNWIND1076 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);1077 # else1078 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */1079 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);1080 # endif1081 1082 # ifdef IEMNATIVE_USE_GDB_JIT1083 /*1084 * Now for telling GDB about this (experimental).1085 *1086 * This seems to work best with ET_DYN.1087 */1088 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,1089 sizeof(GDBJITSYMFILE), NULL);1090 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);1091 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;1092 1093 RT_ZERO(*pSymFile);1094 1095 /*1096 * The ELF header:1097 */1098 pSymFile->EHdr.e_ident[0] = ELFMAG0;1099 pSymFile->EHdr.e_ident[1] = ELFMAG1;1100 pSymFile->EHdr.e_ident[2] = ELFMAG2;1101 pSymFile->EHdr.e_ident[3] = ELFMAG3;1102 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;1103 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;1104 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;1105 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;1106 # ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN1107 pSymFile->EHdr.e_type = ET_DYN;1108 # else1109 pSymFile->EHdr.e_type = ET_REL;1110 # endif1111 # ifdef RT_ARCH_AMD641112 pSymFile->EHdr.e_machine = EM_AMD64;1113 # elif defined(RT_ARCH_ARM64)1114 pSymFile->EHdr.e_machine = EM_AARCH64;1115 # else1116 # error "port me"1117 # endif1118 pSymFile->EHdr.e_version = 1; /*?*/1119 pSymFile->EHdr.e_entry = 0;1120 # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)1121 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);1122 # else1123 pSymFile->EHdr.e_phoff = 0;1124 # endif1125 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);1126 pSymFile->EHdr.e_flags = 0;1127 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);1128 # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)1129 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);1130 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);1131 # else1132 pSymFile->EHdr.e_phentsize = 0;1133 pSymFile->EHdr.e_phnum = 0;1134 # endif1135 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);1136 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);1137 pSymFile->EHdr.e_shstrndx = 0; /* set later */1138 1139 uint32_t offStrTab = 0;1140 #define APPEND_STR(a_szStr) do { \1141 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \1142 offStrTab += sizeof(a_szStr); \1143 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \1144 } while (0)1145 #define APPEND_STR_FMT(a_szStr, ...) do { \1146 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \1147 offStrTab++; \1148 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \1149 } while (0)1150 1151 /*1152 * Section headers.1153 */1154 /* Section header #0: NULL */1155 unsigned i = 0;1156 APPEND_STR("");1157 RT_ZERO(pSymFile->aShdrs[i]);1158 i++;1159 1160 /* Section header: .eh_frame */1161 pSymFile->aShdrs[i].sh_name = offStrTab;1162 APPEND_STR(".eh_frame");1163 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;1164 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;1165 # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)1166 pSymFile->aShdrs[i].sh_offset1167 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);1168 # else1169 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];1170 pSymFile->aShdrs[i].sh_offset = 0;1171 # endif1172 1173 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);1174 pSymFile->aShdrs[i].sh_link = 0;1175 pSymFile->aShdrs[i].sh_info = 0;1176 pSymFile->aShdrs[i].sh_addralign = 1;1177 pSymFile->aShdrs[i].sh_entsize = 0;1178 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));1179 i++;1180 1181 /* Section header: .shstrtab */1182 unsigned const iShStrTab = i;1183 pSymFile->EHdr.e_shstrndx = iShStrTab;1184 pSymFile->aShdrs[i].sh_name = offStrTab;1185 APPEND_STR(".shstrtab");1186 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;1187 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;1188 # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)1189 pSymFile->aShdrs[i].sh_offset1190 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);1191 # else1192 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];1193 pSymFile->aShdrs[i].sh_offset = 0;1194 # endif1195 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);1196 pSymFile->aShdrs[i].sh_link = 0;1197 pSymFile->aShdrs[i].sh_info = 0;1198 pSymFile->aShdrs[i].sh_addralign = 1;1199 pSymFile->aShdrs[i].sh_entsize = 0;1200 i++;1201 1202 /* Section header: .symbols */1203 pSymFile->aShdrs[i].sh_name = offStrTab;1204 APPEND_STR(".symtab");1205 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;1206 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;1207 pSymFile->aShdrs[i].sh_offset1208 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);1209 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);1210 pSymFile->aShdrs[i].sh_link = iShStrTab;1211 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);1212 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);1213 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);1214 i++;1215 1216 # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)1217 /* Section header: .symbols */1218 pSymFile->aShdrs[i].sh_name = offStrTab;1219 APPEND_STR(".dynsym");1220 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;1221 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;1222 pSymFile->aShdrs[i].sh_offset1223 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);1224 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);1225 pSymFile->aShdrs[i].sh_link = iShStrTab;1226 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);1227 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);1228 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);1229 i++;1230 # endif1231 1232 # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)1233 /* Section header: .dynamic */1234 pSymFile->aShdrs[i].sh_name = offStrTab;1235 APPEND_STR(".dynamic");1236 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;1237 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;1238 pSymFile->aShdrs[i].sh_offset1239 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);1240 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);1241 pSymFile->aShdrs[i].sh_link = iShStrTab;1242 pSymFile->aShdrs[i].sh_info = 0;1243 pSymFile->aShdrs[i].sh_addralign = 1;1244 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);1245 i++;1246 # endif1247 1248 /* Section header: .text */1249 unsigned const iShText = i;1250 pSymFile->aShdrs[i].sh_name = offStrTab;1251 APPEND_STR(".text");1252 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;1253 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;1254 # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)1255 pSymFile->aShdrs[i].sh_offset1256 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);1257 # else1258 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);1259 pSymFile->aShdrs[i].sh_offset = 0;1260 # endif1261 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);1262 pSymFile->aShdrs[i].sh_link = 0;1263 pSymFile->aShdrs[i].sh_info = 0;1264 pSymFile->aShdrs[i].sh_addralign = 1;1265 pSymFile->aShdrs[i].sh_entsize = 0;1266 i++;1267 1268 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));1269 1270 # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)1271 /*1272 * The program headers:1273 */1274 /* Everything in a single LOAD segment: */1275 i = 0;1276 pSymFile->aPhdrs[i].p_type = PT_LOAD;1277 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;1278 pSymFile->aPhdrs[i].p_offset1279 = pSymFile->aPhdrs[i].p_vaddr1280 = pSymFile->aPhdrs[i].p_paddr = 0;1281 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */1282 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;1283 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;1284 i++;1285 /* The .dynamic segment. */1286 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;1287 pSymFile->aPhdrs[i].p_flags = PF_R;1288 pSymFile->aPhdrs[i].p_offset1289 = pSymFile->aPhdrs[i].p_vaddr1290 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);1291 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */1292 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);1293 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);1294 i++;1295 1296 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));1297 1298 /*1299 * The dynamic section:1300 */1301 i = 0;1302 pSymFile->aDyn[i].d_tag = DT_SONAME;1303 pSymFile->aDyn[i].d_un.d_val = offStrTab;1304 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);1305 i++;1306 pSymFile->aDyn[i].d_tag = DT_STRTAB;1307 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);1308 i++;1309 pSymFile->aDyn[i].d_tag = DT_STRSZ;1310 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);1311 i++;1312 pSymFile->aDyn[i].d_tag = DT_SYMTAB;1313 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);1314 i++;1315 pSymFile->aDyn[i].d_tag = DT_SYMENT;1316 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);1317 i++;1318 pSymFile->aDyn[i].d_tag = DT_NULL;1319 i++;1320 Assert(i == RT_ELEMENTS(pSymFile->aDyn));1321 # endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */1322 1323 /*1324 * Symbol tables:1325 */1326 /** @todo gdb doesn't seem to really like this ... */1327 i = 0;1328 pSymFile->aSymbols[i].st_name = 0;1329 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;1330 pSymFile->aSymbols[i].st_value = 0;1331 pSymFile->aSymbols[i].st_size = 0;1332 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);1333 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;1334 # ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN1335 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];1336 # endif1337 i++;1338 1339 pSymFile->aSymbols[i].st_name = 0;1340 pSymFile->aSymbols[i].st_shndx = SHN_ABS;1341 pSymFile->aSymbols[i].st_value = 0;1342 pSymFile->aSymbols[i].st_size = 0;1343 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);1344 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;1345 i++;1346 1347 pSymFile->aSymbols[i].st_name = offStrTab;1348 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);1349 # if 01350 pSymFile->aSymbols[i].st_shndx = iShText;1351 pSymFile->aSymbols[i].st_value = 0;1352 # else1353 pSymFile->aSymbols[i].st_shndx = SHN_ABS;1354 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);1355 # endif1356 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;1357 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);1358 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;1359 # ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN1360 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];1361 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);1362 # endif1363 i++;1364 1365 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));1366 Assert(offStrTab < sizeof(pSymFile->szzStrTab));1367 1368 /*1369 * The GDB JIT entry and informing GDB.1370 */1371 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;1372 # if 11373 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);1374 # else1375 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);1376 # endif1377 1378 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);1379 RTCritSectEnter(&g_IemNativeGdbJitLock);1380 pEhFrame->GdbJitEntry.pNext = NULL;1381 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;1382 if (__jit_debug_descriptor.pTail)1383 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;1384 else1385 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;1386 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;1387 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;1388 1389 /* Notify GDB: */1390 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;1391 __jit_debug_register_code();1392 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;1393 RTCritSectLeave(&g_IemNativeGdbJitLock);1394 1395 # else /* !IEMNATIVE_USE_GDB_JIT */1396 RT_NOREF(pVCpu);1397 # endif /* !IEMNATIVE_USE_GDB_JIT */1398 1399 return VINF_SUCCESS;1400 }1401 1402 # endif /* !RT_OS_WINDOWS */1403 #endif /* IN_RING3 */1404 1405 1406 /**1407 * Adds another chunk to the executable memory allocator.1408 *1409 * This is used by the init code for the initial allocation and later by the1410 * regular allocator function when it's out of memory.1411 */1412 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)1413 {1414 /* Check that we've room for growth. */1415 uint32_t const idxChunk = pExecMemAllocator->cChunks;1416 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);1417 1418 /* Allocate a chunk. */1419 #ifdef RT_OS_DARWIN1420 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);1421 #else1422 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);1423 #endif1424 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);1425 1426 /*1427 * Add the chunk.1428 *1429 * This must be done before the unwind init so windows can allocate1430 * memory from the chunk when using the alternative sub-allocator.1431 */1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;1433 #ifdef IN_RING31434 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;1435 #endif1436 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;1437 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;1438 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],1439 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);1440 1441 pExecMemAllocator->cChunks = idxChunk + 1;1442 pExecMemAllocator->idxChunkHint = idxChunk;1443 1444 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;1445 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;1446 1447 #ifdef IN_RING31448 /*1449 * Initialize the unwind information (this cannot really fail atm).1450 * (This sets pvUnwindInfo.)1451 */1452 int rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);1453 if (RT_SUCCESS(rc))1454 { /* likely */ }1455 else1456 {1457 /* Just in case the impossible happens, undo the above up: */1458 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;1459 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;1460 pExecMemAllocator->cChunks = idxChunk;1461 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],1462 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);1463 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;1464 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;1465 1466 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);1467 return rc;1468 }1469 #endif1470 return VINF_SUCCESS;1471 }1472 1473 1474 /**1475 * Initializes the executable memory allocator for native recompilation on the1476 * calling EMT.1477 *1478 * @returns VBox status code.1479 * @param pVCpu The cross context virtual CPU structure of the calling1480 * thread.1481 * @param cbMax The max size of the allocator.1482 * @param cbInitial The initial allocator size.1483 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax1484 * dependent).1485 */1486 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)1487 {1488 /*1489 * Validate input.1490 */1491 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);1492 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);1493 AssertLogRelMsgReturn( cbChunk != UINT32_MAX1494 || cbChunk == 01495 || ( RT_IS_POWER_OF_TWO(cbChunk)1496 && cbChunk >= _1M1497 && cbChunk <= _256M1498 && cbChunk <= cbMax),1499 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),1500 VERR_OUT_OF_RANGE);1501 1502 /*1503 * Adjust/figure out the chunk size.1504 */1505 if (cbChunk == 0 || cbChunk == UINT32_MAX)1506 {1507 if (cbMax >= _256M)1508 cbChunk = _64M;1509 else1510 {1511 if (cbMax < _16M)1512 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;1513 else1514 cbChunk = (uint32_t)cbMax / 4;1515 if (!RT_IS_POWER_OF_TWO(cbChunk))1516 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));1517 }1518 }1519 1520 if (cbChunk > cbMax)1521 cbMax = cbChunk;1522 else1523 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;1524 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);1525 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);1526 1527 /*1528 * Allocate and initialize the allocatore instance.1529 */1530 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);1531 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;1532 size_t cbNeeded = offBitmaps + cbBitmaps;1533 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);1534 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));1535 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)1536 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);1537 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;1538 #endif1539 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);1540 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),1541 VERR_NO_MEMORY);1542 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;1543 pExecMemAllocator->cbChunk = cbChunk;1544 pExecMemAllocator->cMaxChunks = cMaxChunks;1545 pExecMemAllocator->cChunks = 0;1546 pExecMemAllocator->idxChunkHint = 0;1547 pExecMemAllocator->cAllocations = 0;1548 pExecMemAllocator->cbTotal = 0;1549 pExecMemAllocator->cbFree = 0;1550 pExecMemAllocator->cbAllocated = 0;1551 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);1552 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;1553 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);1554 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */1555 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)1556 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);1557 #endif1558 for (uint32_t i = 0; i < cMaxChunks; i++)1559 {1560 pExecMemAllocator->aChunks[i].cFreeUnits = 0;1561 pExecMemAllocator->aChunks[i].idxFreeHint = 0;1562 pExecMemAllocator->aChunks[i].pvChunk = NULL;1563 #ifdef IN_RING01564 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;1565 #else1566 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;1567 #endif1568 }1569 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;1570 1571 /*1572 * Do the initial allocations.1573 */1574 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)1575 {1576 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);1577 AssertLogRelRCReturn(rc, rc);1578 }1579 1580 pExecMemAllocator->idxChunkHint = 0;1581 1582 /*1583 * Register statistics.1584 */1585 PUVM const pUVM = pVCpu->pUVCpu->pUVM;1586 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,1587 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);1588 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,1589 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);1590 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,1591 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);1592 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,1593 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);1594 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,1595 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);1596 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,1597 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);1598 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,1599 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);1600 #ifdef VBOX_WITH_STATISTICS1601 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,1602 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);1603 #endif1604 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING1605 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,1606 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);1607 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,1608 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);1609 #endif1610 1611 return VINF_SUCCESS;1612 }1613 115 1614 116 -
trunk/src/VBox/VMM/include/IEMInternal.h
r104114 r104115 6139 6139 DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT; 6140 6140 DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT; 6141 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk); 6142 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb); 6141 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT; 6142 DECLHIDDEN(void *) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) RT_NOEXCEPT; 6143 DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT; 6144 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT; 6143 6145 DECLASM(DECL_NO_RETURN(void)) iemNativeTbLongJmp(void *pvFramePointer, int rc) RT_NOEXCEPT; 6144 6146
Note:
See TracChangeset
for help on using the changeset viewer.