VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 105250

Last change on this file since 105250 was 105036, checked in by vboxsync, 7 months ago

VMM/IEM: Split the TLB into non-global (even) and global (odd) entries, doubling it in size. In native code the global entries are only checked for ring-0 TBs, as checking both entries is slower than just the even one. bugref:10687

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 430.8 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 105036 2024-06-26 22:33:48Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80
81
82/*
83 * Narrow down configs here to avoid wasting time on unused configs here.
84 * Note! Same checks in IEMAllThrdRecompiler.cpp.
85 */
86
87#ifndef IEM_WITH_CODE_TLB
88# error The code TLB must be enabled for the recompiler.
89#endif
90
91#ifndef IEM_WITH_DATA_TLB
92# error The data TLB must be enabled for the recompiler.
93#endif
94
95#ifndef IEM_WITH_SETJMP
96# error The setjmp approach must be enabled for the recompiler.
97#endif
98
99/** @todo eliminate this clang build hack. */
100#if RT_CLANG_PREREQ(4, 0)
101# pragma GCC diagnostic ignored "-Wunused-function"
102#endif
103
104
105/*********************************************************************************************************************************
106* Internal Functions *
107*********************************************************************************************************************************/
108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
109static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
110#endif
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
113 IEMNATIVEGSTREG enmGstReg, uint32_t off);
114DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
115
116
117
118/*********************************************************************************************************************************
119* Native Recompilation *
120*********************************************************************************************************************************/
121
122
123/**
124 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
125 */
126IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
127{
128 pVCpu->iem.s.cInstructions += idxInstr;
129 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
130}
131
132
133/**
134 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
135 */
136DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
137{
138 uint64_t fCpu = pVCpu->fLocalForcedActions;
139 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
140 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
141 | VMCPU_FF_TLB_FLUSH
142 | VMCPU_FF_UNHALT );
143 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
144 if (RT_LIKELY( ( !fCpu
145 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
146 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
147 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
148 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
149 return false;
150 return true;
151}
152
153
154/**
155 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
156 */
157template <bool const a_fWithIrqCheck>
158IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
159 uint32_t fFlags, RTGCPHYS GCPhysPc))
160{
161 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
162 Assert(idxTbLookup < pTb->cTbLookupEntries);
163 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
164#if 1
165 PIEMTB const pNewTb = *ppNewTb;
166 if (pNewTb)
167 {
168# ifdef VBOX_STRICT
169 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
170 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
171 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
172 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
173 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
174 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
175# endif
176 if (pNewTb->GCPhysPc == GCPhysPc)
177 {
178# ifdef VBOX_STRICT
179 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
180 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
181 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
182 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
183 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
184# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
185 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
186# else
187 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
188 {
189 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
190 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
191 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
192 }
193# endif
194 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
195 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
196 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
197#endif
198
199 /*
200 * Check them + type.
201 */
202 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
203 {
204 /*
205 * Check for interrupts and stuff.
206 */
207 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
208 * The main problem are the statistics and to some degree the logging. :/ */
209 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
210 {
211 /* Do polling. */
212 uint64_t const cTbExecNative = pVCpu->iem.s.cTbExecNative;
213 if ( RT_LIKELY(cTbExecNative & 511)
214 || !TMTimerPollBoolWith32BitMilliTS(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->iem.s.msRecompilerPollNow) )
215 {
216 /*
217 * Success. Update statistics and switch to the next TB.
218 */
219 pVCpu->iem.s.cTbExecNative = cTbExecNative + 1;
220 if (a_fWithIrqCheck)
221 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
222 else
223 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
224
225 pNewTb->cUsed += 1;
226 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
227 pVCpu->iem.s.pCurTbR3 = pNewTb;
228 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
229 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
230 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
231 return (uintptr_t)pNewTb->Native.paInstructions;
232 }
233 }
234 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
235 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
236 }
237 else
238 {
239 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
240 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
241 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
242 }
243 }
244 else
245 {
246 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
247 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
248 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
249 }
250 }
251 else
252 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
253#else
254 NOREF(GCPhysPc);
255#endif
256
257 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
258 return 0;
259}
260
261
262/**
263 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
264 */
265template <bool const a_fWithIrqCheck>
266IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
267{
268 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
269 Assert(idxTbLookup < pTb->cTbLookupEntries);
270 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
271#if 1
272 PIEMTB const pNewTb = *ppNewTb;
273 if (pNewTb)
274 {
275 /*
276 * Calculate the flags for the next TB and check if they match.
277 */
278 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
279 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
280 { /* likely */ }
281 else
282 {
283 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
284 fFlags |= IEMTB_F_INHIBIT_SHADOW;
285 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
286 fFlags |= IEMTB_F_INHIBIT_NMI;
287 }
288 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
289 {
290 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
291 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
292 { /* likely */ }
293 else
294 fFlags |= IEMTB_F_CS_LIM_CHECKS;
295 }
296 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
297
298 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
299 {
300 /*
301 * Do the TLB lookup for flat RIP and compare the result with the next TB.
302 *
303 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
304 */
305 /* Calc the effective PC. */
306 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
307 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
308 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
309
310 /* Advance within the current buffer (PAGE) when possible. */
311 RTGCPHYS GCPhysPc;
312 uint64_t off;
313 if ( pVCpu->iem.s.pbInstrBuf
314 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
315 {
316 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
317 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
318 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
319 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
320 else
321 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
322 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
323 }
324 else
325 {
326 pVCpu->iem.s.pbInstrBuf = NULL;
327 pVCpu->iem.s.offCurInstrStart = 0;
328 pVCpu->iem.s.offInstrNextByte = 0;
329 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
330 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
331 }
332
333 if (pNewTb->GCPhysPc == GCPhysPc)
334 {
335 /*
336 * Check for interrupts and stuff.
337 */
338 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
339 * The main problem are the statistics and to some degree the logging. :/ */
340 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
341 {
342 /* Do polling. */
343 uint64_t const cTbExecNative = pVCpu->iem.s.cTbExecNative;
344 if ( RT_LIKELY(cTbExecNative & 511)
345 || !TMTimerPollBoolWith32BitMilliTS(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->iem.s.msRecompilerPollNow) )
346 {
347 /*
348 * Success. Update statistics and switch to the next TB.
349 */
350 pVCpu->iem.s.cTbExecNative = cTbExecNative + 1;
351 if (a_fWithIrqCheck)
352 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
353 else
354 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
355
356 pNewTb->cUsed += 1;
357 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
358 pVCpu->iem.s.pCurTbR3 = pNewTb;
359 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
360 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
361 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
362 return (uintptr_t)pNewTb->Native.paInstructions;
363 }
364 }
365 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
366 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
367 }
368 else
369 {
370 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
371 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
372 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
373 }
374 }
375 else
376 {
377 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
378 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
379 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
380 }
381 }
382 else
383 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
384#else
385 NOREF(fFlags);
386 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
387#endif
388
389 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
390 return 0;
391}
392
393
394/**
395 * Used by TB code when it wants to raise a \#DE.
396 */
397IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
398{
399 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
400 iemRaiseDivideErrorJmp(pVCpu);
401#ifndef _MSC_VER
402 return VINF_IEM_RAISED_XCPT; /* not reached */
403#endif
404}
405
406
407/**
408 * Used by TB code when it wants to raise a \#UD.
409 */
410IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
411{
412 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
413 iemRaiseUndefinedOpcodeJmp(pVCpu);
414#ifndef _MSC_VER
415 return VINF_IEM_RAISED_XCPT; /* not reached */
416#endif
417}
418
419
420/**
421 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
422 *
423 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
424 */
425IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
426{
427 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
428 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
429 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
430 iemRaiseUndefinedOpcodeJmp(pVCpu);
431 else
432 iemRaiseDeviceNotAvailableJmp(pVCpu);
433#ifndef _MSC_VER
434 return VINF_IEM_RAISED_XCPT; /* not reached */
435#endif
436}
437
438
439/**
440 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
441 *
442 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
443 */
444IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
445{
446 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
447 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
448 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
449 iemRaiseUndefinedOpcodeJmp(pVCpu);
450 else
451 iemRaiseDeviceNotAvailableJmp(pVCpu);
452#ifndef _MSC_VER
453 return VINF_IEM_RAISED_XCPT; /* not reached */
454#endif
455}
456
457
458/**
459 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
460 *
461 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
462 */
463IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
464{
465 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
466 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
467 iemRaiseSimdFpExceptionJmp(pVCpu);
468 else
469 iemRaiseUndefinedOpcodeJmp(pVCpu);
470#ifndef _MSC_VER
471 return VINF_IEM_RAISED_XCPT; /* not reached */
472#endif
473}
474
475
476/**
477 * Used by TB code when it wants to raise a \#NM.
478 */
479IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
480{
481 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
482 iemRaiseDeviceNotAvailableJmp(pVCpu);
483#ifndef _MSC_VER
484 return VINF_IEM_RAISED_XCPT; /* not reached */
485#endif
486}
487
488
489/**
490 * Used by TB code when it wants to raise a \#GP(0).
491 */
492IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
493{
494 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
495 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
496#ifndef _MSC_VER
497 return VINF_IEM_RAISED_XCPT; /* not reached */
498#endif
499}
500
501
502/**
503 * Used by TB code when it wants to raise a \#MF.
504 */
505IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
506{
507 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
508 iemRaiseMathFaultJmp(pVCpu);
509#ifndef _MSC_VER
510 return VINF_IEM_RAISED_XCPT; /* not reached */
511#endif
512}
513
514
515/**
516 * Used by TB code when it wants to raise a \#XF.
517 */
518IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
519{
520 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
521 iemRaiseSimdFpExceptionJmp(pVCpu);
522#ifndef _MSC_VER
523 return VINF_IEM_RAISED_XCPT; /* not reached */
524#endif
525}
526
527
528/**
529 * Used by TB code when detecting opcode changes.
530 * @see iemThreadeFuncWorkerObsoleteTb
531 */
532IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
533{
534 /* We set fSafeToFree to false where as we're being called in the context
535 of a TB callback function, which for native TBs means we cannot release
536 the executable memory till we've returned our way back to iemTbExec as
537 that return path codes via the native code generated for the TB. */
538 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
539 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
540 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
541 return VINF_IEM_REEXEC_BREAK;
542}
543
544
545/**
546 * Used by TB code when we need to switch to a TB with CS.LIM checking.
547 */
548IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
549{
550 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
551 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
552 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
553 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
554 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
555 return VINF_IEM_REEXEC_BREAK;
556}
557
558
559/**
560 * Used by TB code when we missed a PC check after a branch.
561 */
562IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
563{
564 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
565 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
566 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
567 pVCpu->iem.s.pbInstrBuf));
568 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
569 return VINF_IEM_REEXEC_BREAK;
570}
571
572
573
574/*********************************************************************************************************************************
575* Helpers: Segmented memory fetches and stores. *
576*********************************************************************************************************************************/
577
578/**
579 * Used by TB code to load unsigned 8-bit data w/ segmentation.
580 */
581IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
582{
583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
584 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
585#else
586 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
587#endif
588}
589
590
591/**
592 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
593 * to 16 bits.
594 */
595IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
596{
597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
598 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
599#else
600 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
601#endif
602}
603
604
605/**
606 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
607 * to 32 bits.
608 */
609IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
610{
611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
612 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
613#else
614 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
615#endif
616}
617
618/**
619 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
620 * to 64 bits.
621 */
622IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
623{
624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
625 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
626#else
627 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
628#endif
629}
630
631
632/**
633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
634 */
635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
636{
637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
638 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
639#else
640 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
641#endif
642}
643
644
645/**
646 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
647 * to 32 bits.
648 */
649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
650{
651#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
652 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
653#else
654 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
655#endif
656}
657
658
659/**
660 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
661 * to 64 bits.
662 */
663IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
664{
665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
666 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
667#else
668 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
669#endif
670}
671
672
673/**
674 * Used by TB code to load unsigned 32-bit data w/ segmentation.
675 */
676IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
677{
678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
679 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
680#else
681 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
682#endif
683}
684
685
686/**
687 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
688 * to 64 bits.
689 */
690IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
691{
692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
693 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
694#else
695 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
696#endif
697}
698
699
700/**
701 * Used by TB code to load unsigned 64-bit data w/ segmentation.
702 */
703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
704{
705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
706 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
707#else
708 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
709#endif
710}
711
712
713#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
714/**
715 * Used by TB code to load 128-bit data w/ segmentation.
716 */
717IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
721#else
722 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load 128-bit data w/ segmentation.
729 */
730IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
731{
732#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
733 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
734#else
735 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
736#endif
737}
738
739
740/**
741 * Used by TB code to load 128-bit data w/ segmentation.
742 */
743IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
744{
745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
746 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
747#else
748 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
749#endif
750}
751
752
753/**
754 * Used by TB code to load 256-bit data w/ segmentation.
755 */
756IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
757{
758#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
759 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
760#else
761 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
762#endif
763}
764
765
766/**
767 * Used by TB code to load 256-bit data w/ segmentation.
768 */
769IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
770{
771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
772 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
773#else
774 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
775#endif
776}
777#endif
778
779
780/**
781 * Used by TB code to store unsigned 8-bit data w/ segmentation.
782 */
783IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
784{
785#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
786 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
787#else
788 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
789#endif
790}
791
792
793/**
794 * Used by TB code to store unsigned 16-bit data w/ segmentation.
795 */
796IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
797{
798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
799 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
800#else
801 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
802#endif
803}
804
805
806/**
807 * Used by TB code to store unsigned 32-bit data w/ segmentation.
808 */
809IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
810{
811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
812 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
813#else
814 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
815#endif
816}
817
818
819/**
820 * Used by TB code to store unsigned 64-bit data w/ segmentation.
821 */
822IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
823{
824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
825 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
826#else
827 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
828#endif
829}
830
831
832#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
833/**
834 * Used by TB code to store unsigned 128-bit data w/ segmentation.
835 */
836IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
837{
838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
839 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
840#else
841 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
842#endif
843}
844
845
846/**
847 * Used by TB code to store unsigned 128-bit data w/ segmentation.
848 */
849IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
850{
851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
852 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
853#else
854 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
855#endif
856}
857
858
859/**
860 * Used by TB code to store unsigned 256-bit data w/ segmentation.
861 */
862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
863{
864#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
865 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
866#else
867 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
868#endif
869}
870
871
872/**
873 * Used by TB code to store unsigned 256-bit data w/ segmentation.
874 */
875IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
876{
877#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
878 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
879#else
880 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
881#endif
882}
883#endif
884
885
886
887/**
888 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
889 */
890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
891{
892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
893 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
894#else
895 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
896#endif
897}
898
899
900/**
901 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
902 */
903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
904{
905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
906 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
907#else
908 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
909#endif
910}
911
912
913/**
914 * Used by TB code to store an 32-bit selector value onto a generic stack.
915 *
916 * Intel CPUs doesn't do write a whole dword, thus the special function.
917 */
918IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
919{
920#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
921 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
922#else
923 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
924#endif
925}
926
927
928/**
929 * Used by TB code to push unsigned 64-bit value onto a generic stack.
930 */
931IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
932{
933#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
934 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
935#else
936 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
937#endif
938}
939
940
941/**
942 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
943 */
944IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
945{
946#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
947 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
948#else
949 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
950#endif
951}
952
953
954/**
955 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
956 */
957IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
958{
959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
960 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
961#else
962 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
963#endif
964}
965
966
967/**
968 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
969 */
970IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
971{
972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
973 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
974#else
975 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
976#endif
977}
978
979
980
981/*********************************************************************************************************************************
982* Helpers: Flat memory fetches and stores. *
983*********************************************************************************************************************************/
984
985/**
986 * Used by TB code to load unsigned 8-bit data w/ flat address.
987 * @note Zero extending the value to 64-bit to simplify assembly.
988 */
989IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
990{
991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
992 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
993#else
994 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
995#endif
996}
997
998
999/**
1000 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1001 * to 16 bits.
1002 * @note Zero extending the value to 64-bit to simplify assembly.
1003 */
1004IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1005{
1006#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1007 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1008#else
1009 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1010#endif
1011}
1012
1013
1014/**
1015 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1016 * to 32 bits.
1017 * @note Zero extending the value to 64-bit to simplify assembly.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1022 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1023#else
1024 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1025#endif
1026}
1027
1028
1029/**
1030 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1031 * to 64 bits.
1032 */
1033IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1034{
1035#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1036 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1037#else
1038 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1039#endif
1040}
1041
1042
1043/**
1044 * Used by TB code to load unsigned 16-bit data w/ flat address.
1045 * @note Zero extending the value to 64-bit to simplify assembly.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1050 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1051#else
1052 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1059 * to 32 bits.
1060 * @note Zero extending the value to 64-bit to simplify assembly.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1065 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1066#else
1067 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1074 * to 64 bits.
1075 * @note Zero extending the value to 64-bit to simplify assembly.
1076 */
1077IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1078{
1079#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1080 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1081#else
1082 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1083#endif
1084}
1085
1086
1087/**
1088 * Used by TB code to load unsigned 32-bit data w/ flat address.
1089 * @note Zero extending the value to 64-bit to simplify assembly.
1090 */
1091IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1092{
1093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1094 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1095#else
1096 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1097#endif
1098}
1099
1100
1101/**
1102 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1103 * to 64 bits.
1104 * @note Zero extending the value to 64-bit to simplify assembly.
1105 */
1106IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1107{
1108#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1109 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1110#else
1111 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1112#endif
1113}
1114
1115
1116/**
1117 * Used by TB code to load unsigned 64-bit data w/ flat address.
1118 */
1119IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1120{
1121#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1122 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1123#else
1124 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1125#endif
1126}
1127
1128
1129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1130/**
1131 * Used by TB code to load unsigned 128-bit data w/ flat address.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1134{
1135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1136 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1137#else
1138 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1139#endif
1140}
1141
1142
1143/**
1144 * Used by TB code to load unsigned 128-bit data w/ flat address.
1145 */
1146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1147{
1148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1149 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1150#else
1151 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1152#endif
1153}
1154
1155
1156/**
1157 * Used by TB code to load unsigned 128-bit data w/ flat address.
1158 */
1159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1160{
1161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1162 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1163#else
1164 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1165#endif
1166}
1167
1168
1169/**
1170 * Used by TB code to load unsigned 256-bit data w/ flat address.
1171 */
1172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1173{
1174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1175 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1176#else
1177 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1178#endif
1179}
1180
1181
1182/**
1183 * Used by TB code to load unsigned 256-bit data w/ flat address.
1184 */
1185IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1186{
1187#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1188 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1189#else
1190 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1191#endif
1192}
1193#endif
1194
1195
1196/**
1197 * Used by TB code to store unsigned 8-bit data w/ flat address.
1198 */
1199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1200{
1201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1202 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1203#else
1204 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1205#endif
1206}
1207
1208
1209/**
1210 * Used by TB code to store unsigned 16-bit data w/ flat address.
1211 */
1212IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1213{
1214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1215 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1216#else
1217 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1218#endif
1219}
1220
1221
1222/**
1223 * Used by TB code to store unsigned 32-bit data w/ flat address.
1224 */
1225IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1226{
1227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1228 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1229#else
1230 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1231#endif
1232}
1233
1234
1235/**
1236 * Used by TB code to store unsigned 64-bit data w/ flat address.
1237 */
1238IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1239{
1240#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1241 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1242#else
1243 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1244#endif
1245}
1246
1247
1248#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1249/**
1250 * Used by TB code to store unsigned 128-bit data w/ flat address.
1251 */
1252IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1253{
1254#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1255 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1256#else
1257 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1258#endif
1259}
1260
1261
1262/**
1263 * Used by TB code to store unsigned 128-bit data w/ flat address.
1264 */
1265IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1266{
1267#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1268 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1269#else
1270 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1271#endif
1272}
1273
1274
1275/**
1276 * Used by TB code to store unsigned 256-bit data w/ flat address.
1277 */
1278IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1279{
1280#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1281 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1282#else
1283 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1284#endif
1285}
1286
1287
1288/**
1289 * Used by TB code to store unsigned 256-bit data w/ flat address.
1290 */
1291IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1292{
1293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1294 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1295#else
1296 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1297#endif
1298}
1299#endif
1300
1301
1302
1303/**
1304 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1305 */
1306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1307{
1308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1309 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1310#else
1311 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1312#endif
1313}
1314
1315
1316/**
1317 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1318 */
1319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1322 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1323#else
1324 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to store a segment selector value onto a flat stack.
1331 *
1332 * Intel CPUs doesn't do write a whole dword, thus the special function.
1333 */
1334IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1335{
1336#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1337 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1338#else
1339 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1340#endif
1341}
1342
1343
1344/**
1345 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1346 */
1347IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1350 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1351#else
1352 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1361{
1362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1363 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1364#else
1365 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1366#endif
1367}
1368
1369
1370/**
1371 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1372 */
1373IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1374{
1375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1376 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1377#else
1378 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1379#endif
1380}
1381
1382
1383/**
1384 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1385 */
1386IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1387{
1388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1389 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1390#else
1391 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1392#endif
1393}
1394
1395
1396
1397/*********************************************************************************************************************************
1398* Helpers: Segmented memory mapping. *
1399*********************************************************************************************************************************/
1400
1401/**
1402 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1403 * segmentation.
1404 */
1405IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1406 RTGCPTR GCPtrMem, uint8_t iSegReg))
1407{
1408#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1409 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#else
1411 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1412#endif
1413}
1414
1415
1416/**
1417 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1418 */
1419IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1420 RTGCPTR GCPtrMem, uint8_t iSegReg))
1421{
1422#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1423 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#else
1425 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1426#endif
1427}
1428
1429
1430/**
1431 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1432 */
1433IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1434 RTGCPTR GCPtrMem, uint8_t iSegReg))
1435{
1436#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1437 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#else
1439 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1440#endif
1441}
1442
1443
1444/**
1445 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1446 */
1447IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1448 RTGCPTR GCPtrMem, uint8_t iSegReg))
1449{
1450#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1451 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1452#else
1453 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1454#endif
1455}
1456
1457
1458/**
1459 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1460 * segmentation.
1461 */
1462IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1463 RTGCPTR GCPtrMem, uint8_t iSegReg))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1467#else
1468 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1477 RTGCPTR GCPtrMem, uint8_t iSegReg))
1478{
1479#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1480 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1481#else
1482 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1483#endif
1484}
1485
1486
1487/**
1488 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1489 */
1490IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1491 RTGCPTR GCPtrMem, uint8_t iSegReg))
1492{
1493#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1494 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1495#else
1496 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1497#endif
1498}
1499
1500
1501/**
1502 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1503 */
1504IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1505 RTGCPTR GCPtrMem, uint8_t iSegReg))
1506{
1507#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1508 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1509#else
1510 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1511#endif
1512}
1513
1514
1515/**
1516 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1517 * segmentation.
1518 */
1519IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1520 RTGCPTR GCPtrMem, uint8_t iSegReg))
1521{
1522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1523 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1524#else
1525 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1526#endif
1527}
1528
1529
1530/**
1531 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1532 */
1533IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1534 RTGCPTR GCPtrMem, uint8_t iSegReg))
1535{
1536#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1537 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1538#else
1539 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1540#endif
1541}
1542
1543
1544/**
1545 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1546 */
1547IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1548 RTGCPTR GCPtrMem, uint8_t iSegReg))
1549{
1550#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1551 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1552#else
1553 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1554#endif
1555}
1556
1557
1558/**
1559 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1560 */
1561IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1562 RTGCPTR GCPtrMem, uint8_t iSegReg))
1563{
1564#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1565 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1566#else
1567 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1568#endif
1569}
1570
1571
1572/**
1573 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1574 * segmentation.
1575 */
1576IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1577 RTGCPTR GCPtrMem, uint8_t iSegReg))
1578{
1579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1580 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1581#else
1582 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1583#endif
1584}
1585
1586
1587/**
1588 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1591 RTGCPTR GCPtrMem, uint8_t iSegReg))
1592{
1593#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1594 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1595#else
1596 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1597#endif
1598}
1599
1600
1601/**
1602 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1603 */
1604IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1605 RTGCPTR GCPtrMem, uint8_t iSegReg))
1606{
1607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1608 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1609#else
1610 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1619 RTGCPTR GCPtrMem, uint8_t iSegReg))
1620{
1621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1622 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1623#else
1624 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1633 RTGCPTR GCPtrMem, uint8_t iSegReg))
1634{
1635#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1636 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1637#else
1638 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1639#endif
1640}
1641
1642
1643/**
1644 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1647 RTGCPTR GCPtrMem, uint8_t iSegReg))
1648{
1649#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1650 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1651#else
1652 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1653#endif
1654}
1655
1656
1657/**
1658 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1659 * segmentation.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1662 RTGCPTR GCPtrMem, uint8_t iSegReg))
1663{
1664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1665 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1666#else
1667 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1668#endif
1669}
1670
1671
1672/**
1673 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1674 */
1675IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1676 RTGCPTR GCPtrMem, uint8_t iSegReg))
1677{
1678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1679 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1680#else
1681 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1682#endif
1683}
1684
1685
1686/**
1687 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1690 RTGCPTR GCPtrMem, uint8_t iSegReg))
1691{
1692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1693 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1694#else
1695 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1696#endif
1697}
1698
1699
1700/**
1701 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1704 RTGCPTR GCPtrMem, uint8_t iSegReg))
1705{
1706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1707 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1708#else
1709 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1710#endif
1711}
1712
1713
1714/*********************************************************************************************************************************
1715* Helpers: Flat memory mapping. *
1716*********************************************************************************************************************************/
1717
1718/**
1719 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1720 * address.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1723{
1724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1725 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1726#else
1727 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1728#endif
1729}
1730
1731
1732/**
1733 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1736{
1737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1738 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1739#else
1740 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1741#endif
1742}
1743
1744
1745/**
1746 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1747 */
1748IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1749{
1750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1751 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1752#else
1753 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1754#endif
1755}
1756
1757
1758/**
1759 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1764 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1765#else
1766 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1773 * address.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1778 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1779#else
1780 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1791 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1792#else
1793 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1802{
1803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1804 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1805#else
1806 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1807#endif
1808}
1809
1810
1811/**
1812 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1813 */
1814IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1815{
1816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1817 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1818#else
1819 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1820#endif
1821}
1822
1823
1824/**
1825 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1826 * address.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1831 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1832#else
1833 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1844 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1845#else
1846 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1857 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1858#else
1859 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1870 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1871#else
1872 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1879 * address.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1884 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1885#else
1886 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1897 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1898#else
1899 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1910 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1911#else
1912 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1923 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1924#else
1925 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1936 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1937#else
1938 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1949 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1950#else
1951 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1952#endif
1953}
1954
1955
1956/**
1957 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1958 * address.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1963 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1964#else
1965 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1976 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1977#else
1978 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1989 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1990#else
1991 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2002 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2003#else
2004 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2005#endif
2006}
2007
2008
2009/*********************************************************************************************************************************
2010* Helpers: Commit, rollback & unmap *
2011*********************************************************************************************************************************/
2012
2013/**
2014 * Used by TB code to commit and unmap a read-write memory mapping.
2015 */
2016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2017{
2018 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2019}
2020
2021
2022/**
2023 * Used by TB code to commit and unmap a read-write memory mapping.
2024 */
2025IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2026{
2027 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2028}
2029
2030
2031/**
2032 * Used by TB code to commit and unmap a write-only memory mapping.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2035{
2036 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2037}
2038
2039
2040/**
2041 * Used by TB code to commit and unmap a read-only memory mapping.
2042 */
2043IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2044{
2045 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2046}
2047
2048
2049/**
2050 * Reinitializes the native recompiler state.
2051 *
2052 * Called before starting a new recompile job.
2053 */
2054static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2055{
2056 pReNative->cLabels = 0;
2057 pReNative->bmLabelTypes = 0;
2058 pReNative->cFixups = 0;
2059#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2060 pReNative->pDbgInfo->cEntries = 0;
2061 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2062#endif
2063 pReNative->pTbOrg = pTb;
2064 pReNative->cCondDepth = 0;
2065 pReNative->uCondSeqNo = 0;
2066 pReNative->uCheckIrqSeqNo = 0;
2067 pReNative->uTlbSeqNo = 0;
2068
2069#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2070 pReNative->Core.offPc = 0;
2071 pReNative->Core.cInstrPcUpdateSkipped = 0;
2072#endif
2073#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2074 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2075#endif
2076 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2077#if IEMNATIVE_HST_GREG_COUNT < 32
2078 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2079#endif
2080 ;
2081 pReNative->Core.bmHstRegsWithGstShadow = 0;
2082 pReNative->Core.bmGstRegShadows = 0;
2083#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2084 pReNative->Core.bmGstRegShadowDirty = 0;
2085#endif
2086 pReNative->Core.bmVars = 0;
2087 pReNative->Core.bmStack = 0;
2088 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2089 pReNative->Core.u64ArgVars = UINT64_MAX;
2090
2091 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 22);
2092 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2093 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2094 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2095 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2096 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2097 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2098 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2114
2115 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2116
2117 /* Full host register reinit: */
2118 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2119 {
2120 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2121 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2122 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2123 }
2124
2125 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2126 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2127#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2128 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2129#endif
2130#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2131 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2132#endif
2133#ifdef IEMNATIVE_REG_FIXED_TMP1
2134 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2135#endif
2136#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2137 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2138#endif
2139 );
2140 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2141 {
2142 fRegs &= ~RT_BIT_32(idxReg);
2143 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2144 }
2145
2146 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2147#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2148 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2149#endif
2150#ifdef IEMNATIVE_REG_FIXED_TMP0
2151 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2152#endif
2153#ifdef IEMNATIVE_REG_FIXED_TMP1
2154 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2155#endif
2156#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2157 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2158#endif
2159
2160#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2161 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2162# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2163 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2164# endif
2165 ;
2166 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2167 pReNative->Core.bmGstSimdRegShadows = 0;
2168 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2169 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2170
2171 /* Full host register reinit: */
2172 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2173 {
2174 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2175 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2176 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2177 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2178 }
2179
2180 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2181 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2182 {
2183 fRegs &= ~RT_BIT_32(idxReg);
2184 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2185 }
2186
2187#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2188 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2189#endif
2190
2191#endif
2192
2193 return pReNative;
2194}
2195
2196
2197/**
2198 * Allocates and initializes the native recompiler state.
2199 *
2200 * This is called the first time an EMT wants to recompile something.
2201 *
2202 * @returns Pointer to the new recompiler state.
2203 * @param pVCpu The cross context virtual CPU structure of the calling
2204 * thread.
2205 * @param pTb The TB that's about to be recompiled.
2206 * @thread EMT(pVCpu)
2207 */
2208static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2209{
2210 VMCPU_ASSERT_EMT(pVCpu);
2211
2212 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2213 AssertReturn(pReNative, NULL);
2214
2215 /*
2216 * Try allocate all the buffers and stuff we need.
2217 */
2218 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2219 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2220 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2221#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2222 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2223#endif
2224 if (RT_LIKELY( pReNative->pInstrBuf
2225 && pReNative->paLabels
2226 && pReNative->paFixups)
2227#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2228 && pReNative->pDbgInfo
2229#endif
2230 )
2231 {
2232 /*
2233 * Set the buffer & array sizes on success.
2234 */
2235 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2236 pReNative->cLabelsAlloc = _8K;
2237 pReNative->cFixupsAlloc = _16K;
2238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2239 pReNative->cDbgInfoAlloc = _16K;
2240#endif
2241
2242 /* Other constant stuff: */
2243 pReNative->pVCpu = pVCpu;
2244
2245 /*
2246 * Done, just need to save it and reinit it.
2247 */
2248 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2249 return iemNativeReInit(pReNative, pTb);
2250 }
2251
2252 /*
2253 * Failed. Cleanup and return.
2254 */
2255 AssertFailed();
2256 RTMemFree(pReNative->pInstrBuf);
2257 RTMemFree(pReNative->paLabels);
2258 RTMemFree(pReNative->paFixups);
2259#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2260 RTMemFree(pReNative->pDbgInfo);
2261#endif
2262 RTMemFree(pReNative);
2263 return NULL;
2264}
2265
2266
2267/**
2268 * Creates a label
2269 *
2270 * If the label does not yet have a defined position,
2271 * call iemNativeLabelDefine() later to set it.
2272 *
2273 * @returns Label ID. Throws VBox status code on failure, so no need to check
2274 * the return value.
2275 * @param pReNative The native recompile state.
2276 * @param enmType The label type.
2277 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2278 * label is not yet defined (default).
2279 * @param uData Data associated with the lable. Only applicable to
2280 * certain type of labels. Default is zero.
2281 */
2282DECL_HIDDEN_THROW(uint32_t)
2283iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2284 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2285{
2286 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2287
2288 /*
2289 * Locate existing label definition.
2290 *
2291 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2292 * and uData is zero.
2293 */
2294 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2295 uint32_t const cLabels = pReNative->cLabels;
2296 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2297#ifndef VBOX_STRICT
2298 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2299 && offWhere == UINT32_MAX
2300 && uData == 0
2301#endif
2302 )
2303 {
2304#ifndef VBOX_STRICT
2305 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2307 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2308 if (idxLabel < pReNative->cLabels)
2309 return idxLabel;
2310#else
2311 for (uint32_t i = 0; i < cLabels; i++)
2312 if ( paLabels[i].enmType == enmType
2313 && paLabels[i].uData == uData)
2314 {
2315 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2316 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2317 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2318 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2319 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2320 return i;
2321 }
2322 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2323 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2324#endif
2325 }
2326
2327 /*
2328 * Make sure we've got room for another label.
2329 */
2330 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2331 { /* likely */ }
2332 else
2333 {
2334 uint32_t cNew = pReNative->cLabelsAlloc;
2335 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2336 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2337 cNew *= 2;
2338 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2339 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2340 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2341 pReNative->paLabels = paLabels;
2342 pReNative->cLabelsAlloc = cNew;
2343 }
2344
2345 /*
2346 * Define a new label.
2347 */
2348 paLabels[cLabels].off = offWhere;
2349 paLabels[cLabels].enmType = enmType;
2350 paLabels[cLabels].uData = uData;
2351 pReNative->cLabels = cLabels + 1;
2352
2353 Assert((unsigned)enmType < 64);
2354 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2355
2356 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2357 {
2358 Assert(uData == 0);
2359 pReNative->aidxUniqueLabels[enmType] = cLabels;
2360 }
2361
2362 if (offWhere != UINT32_MAX)
2363 {
2364#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2365 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2366 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2367#endif
2368 }
2369 return cLabels;
2370}
2371
2372
2373/**
2374 * Defines the location of an existing label.
2375 *
2376 * @param pReNative The native recompile state.
2377 * @param idxLabel The label to define.
2378 * @param offWhere The position.
2379 */
2380DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2381{
2382 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2383 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2384 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2385 pLabel->off = offWhere;
2386#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2387 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2388 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2389#endif
2390}
2391
2392
2393/**
2394 * Looks up a lable.
2395 *
2396 * @returns Label ID if found, UINT32_MAX if not.
2397 */
2398static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2399 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2400{
2401 Assert((unsigned)enmType < 64);
2402 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2403 {
2404 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2405 return pReNative->aidxUniqueLabels[enmType];
2406
2407 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2408 uint32_t const cLabels = pReNative->cLabels;
2409 for (uint32_t i = 0; i < cLabels; i++)
2410 if ( paLabels[i].enmType == enmType
2411 && paLabels[i].uData == uData
2412 && ( paLabels[i].off == offWhere
2413 || offWhere == UINT32_MAX
2414 || paLabels[i].off == UINT32_MAX))
2415 return i;
2416 }
2417 return UINT32_MAX;
2418}
2419
2420
2421/**
2422 * Adds a fixup.
2423 *
2424 * @throws VBox status code (int) on failure.
2425 * @param pReNative The native recompile state.
2426 * @param offWhere The instruction offset of the fixup location.
2427 * @param idxLabel The target label ID for the fixup.
2428 * @param enmType The fixup type.
2429 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2430 */
2431DECL_HIDDEN_THROW(void)
2432iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2433 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2434{
2435 Assert(idxLabel <= UINT16_MAX);
2436 Assert((unsigned)enmType <= UINT8_MAX);
2437#ifdef RT_ARCH_ARM64
2438 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2439 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2440 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2441#endif
2442
2443 /*
2444 * Make sure we've room.
2445 */
2446 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2447 uint32_t const cFixups = pReNative->cFixups;
2448 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2449 { /* likely */ }
2450 else
2451 {
2452 uint32_t cNew = pReNative->cFixupsAlloc;
2453 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2454 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2455 cNew *= 2;
2456 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2457 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2458 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2459 pReNative->paFixups = paFixups;
2460 pReNative->cFixupsAlloc = cNew;
2461 }
2462
2463 /*
2464 * Add the fixup.
2465 */
2466 paFixups[cFixups].off = offWhere;
2467 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2468 paFixups[cFixups].enmType = enmType;
2469 paFixups[cFixups].offAddend = offAddend;
2470 pReNative->cFixups = cFixups + 1;
2471}
2472
2473
2474/**
2475 * Slow code path for iemNativeInstrBufEnsure.
2476 */
2477DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2478{
2479 /* Double the buffer size till we meet the request. */
2480 uint32_t cNew = pReNative->cInstrBufAlloc;
2481 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2482 do
2483 cNew *= 2;
2484 while (cNew < off + cInstrReq);
2485
2486 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2487#ifdef RT_ARCH_ARM64
2488 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2489#else
2490 uint32_t const cbMaxInstrBuf = _2M;
2491#endif
2492 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2493
2494 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2495 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2496
2497#ifdef VBOX_STRICT
2498 pReNative->offInstrBufChecked = off + cInstrReq;
2499#endif
2500 pReNative->cInstrBufAlloc = cNew;
2501 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2502}
2503
2504#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2505
2506/**
2507 * Grows the static debug info array used during recompilation.
2508 *
2509 * @returns Pointer to the new debug info block; throws VBox status code on
2510 * failure, so no need to check the return value.
2511 */
2512DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2513{
2514 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2515 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2516 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2517 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2518 pReNative->pDbgInfo = pDbgInfo;
2519 pReNative->cDbgInfoAlloc = cNew;
2520 return pDbgInfo;
2521}
2522
2523
2524/**
2525 * Adds a new debug info uninitialized entry, returning the pointer to it.
2526 */
2527DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2528{
2529 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2530 { /* likely */ }
2531 else
2532 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2533 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2534}
2535
2536
2537/**
2538 * Debug Info: Adds a native offset record, if necessary.
2539 */
2540DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2541{
2542 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2543
2544 /*
2545 * Do we need this one?
2546 */
2547 uint32_t const offPrev = pDbgInfo->offNativeLast;
2548 if (offPrev == off)
2549 return;
2550 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2551
2552 /*
2553 * Add it.
2554 */
2555 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2556 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2557 pEntry->NativeOffset.offNative = off;
2558 pDbgInfo->offNativeLast = off;
2559}
2560
2561
2562/**
2563 * Debug Info: Record info about a label.
2564 */
2565static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2566{
2567 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2568 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2569 pEntry->Label.uUnused = 0;
2570 pEntry->Label.enmLabel = (uint8_t)enmType;
2571 pEntry->Label.uData = uData;
2572}
2573
2574
2575/**
2576 * Debug Info: Record info about a threaded call.
2577 */
2578static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2579{
2580 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2581 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2582 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2583 pEntry->ThreadedCall.uUnused = 0;
2584 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2585}
2586
2587
2588/**
2589 * Debug Info: Record info about a new guest instruction.
2590 */
2591static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2592{
2593 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2594 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2595 pEntry->GuestInstruction.uUnused = 0;
2596 pEntry->GuestInstruction.fExec = fExec;
2597}
2598
2599
2600/**
2601 * Debug Info: Record info about guest register shadowing.
2602 */
2603DECL_HIDDEN_THROW(void)
2604iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2605 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2606{
2607 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2608 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2609 pEntry->GuestRegShadowing.uUnused = 0;
2610 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2611 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2612 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2613#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2614 Assert( idxHstReg != UINT8_MAX
2615 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2616#endif
2617}
2618
2619
2620# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2621/**
2622 * Debug Info: Record info about guest register shadowing.
2623 */
2624DECL_HIDDEN_THROW(void)
2625iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2626 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2627{
2628 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2629 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2630 pEntry->GuestSimdRegShadowing.uUnused = 0;
2631 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2632 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2633 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2634}
2635# endif
2636
2637
2638# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2639/**
2640 * Debug Info: Record info about delayed RIP updates.
2641 */
2642DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2643{
2644 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2645 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2646 pEntry->DelayedPcUpdate.offPc = offPc;
2647 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2648}
2649# endif
2650
2651# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2652
2653/**
2654 * Debug Info: Record info about a dirty guest register.
2655 */
2656DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2657 uint8_t idxGstReg, uint8_t idxHstReg)
2658{
2659 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2660 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2661 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2662 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2663 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2664}
2665
2666
2667/**
2668 * Debug Info: Record info about a dirty guest register writeback operation.
2669 */
2670DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2671{
2672 unsigned const cBitsGstRegMask = 25;
2673 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2674
2675 /* The first block of 25 bits: */
2676 if (fGstReg & fGstRegMask)
2677 {
2678 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2679 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2680 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2681 pEntry->GuestRegWriteback.cShift = 0;
2682 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2683 fGstReg &= ~(uint64_t)fGstRegMask;
2684 if (!fGstReg)
2685 return;
2686 }
2687
2688 /* The second block of 25 bits: */
2689 fGstReg >>= cBitsGstRegMask;
2690 if (fGstReg & fGstRegMask)
2691 {
2692 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2693 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2694 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2695 pEntry->GuestRegWriteback.cShift = 0;
2696 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2697 fGstReg &= ~(uint64_t)fGstRegMask;
2698 if (!fGstReg)
2699 return;
2700 }
2701
2702 /* The last block with 14 bits: */
2703 fGstReg >>= cBitsGstRegMask;
2704 Assert(fGstReg & fGstRegMask);
2705 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2706 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2707 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2708 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2709 pEntry->GuestRegWriteback.cShift = 2;
2710 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2711}
2712
2713# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2714
2715#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2716
2717
2718/*********************************************************************************************************************************
2719* Register Allocator *
2720*********************************************************************************************************************************/
2721
2722/**
2723 * Register parameter indexes (indexed by argument number).
2724 */
2725DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2726{
2727 IEMNATIVE_CALL_ARG0_GREG,
2728 IEMNATIVE_CALL_ARG1_GREG,
2729 IEMNATIVE_CALL_ARG2_GREG,
2730 IEMNATIVE_CALL_ARG3_GREG,
2731#if defined(IEMNATIVE_CALL_ARG4_GREG)
2732 IEMNATIVE_CALL_ARG4_GREG,
2733# if defined(IEMNATIVE_CALL_ARG5_GREG)
2734 IEMNATIVE_CALL_ARG5_GREG,
2735# if defined(IEMNATIVE_CALL_ARG6_GREG)
2736 IEMNATIVE_CALL_ARG6_GREG,
2737# if defined(IEMNATIVE_CALL_ARG7_GREG)
2738 IEMNATIVE_CALL_ARG7_GREG,
2739# endif
2740# endif
2741# endif
2742#endif
2743};
2744AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2745
2746/**
2747 * Call register masks indexed by argument count.
2748 */
2749DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2750{
2751 0,
2752 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2753 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2754 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2755 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2756 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2757#if defined(IEMNATIVE_CALL_ARG4_GREG)
2758 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2759 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2760# if defined(IEMNATIVE_CALL_ARG5_GREG)
2761 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2762 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2763# if defined(IEMNATIVE_CALL_ARG6_GREG)
2764 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2765 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2766 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2767# if defined(IEMNATIVE_CALL_ARG7_GREG)
2768 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2769 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2770 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2771# endif
2772# endif
2773# endif
2774#endif
2775};
2776
2777#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2778/**
2779 * BP offset of the stack argument slots.
2780 *
2781 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2782 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2783 */
2784DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2785{
2786 IEMNATIVE_FP_OFF_STACK_ARG0,
2787# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2788 IEMNATIVE_FP_OFF_STACK_ARG1,
2789# endif
2790# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2791 IEMNATIVE_FP_OFF_STACK_ARG2,
2792# endif
2793# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2794 IEMNATIVE_FP_OFF_STACK_ARG3,
2795# endif
2796};
2797AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2798#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2799
2800/**
2801 * Info about shadowed guest register values.
2802 * @see IEMNATIVEGSTREG
2803 */
2804DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2805{
2806#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2807 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2808 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2809 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2810 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2811 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2812 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2813 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2814 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2815 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2816 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2817 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2818 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2819 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2820 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2821 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2822 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2823 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2824 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2825 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2826 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2827 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2828 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2829 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2830 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2831 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2832 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2833 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2834 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2835 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2836 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2837 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2838 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2839 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2840 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2841 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2842 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2843 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2844 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2845 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2846 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2847 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2848 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2849 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2850 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2851 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2852 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2853 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2854 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2855#undef CPUMCTX_OFF_AND_SIZE
2856};
2857AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2858
2859
2860/** Host CPU general purpose register names. */
2861DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2862{
2863#ifdef RT_ARCH_AMD64
2864 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2865#elif RT_ARCH_ARM64
2866 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2867 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2868#else
2869# error "port me"
2870#endif
2871};
2872
2873
2874#if 0 /* unused */
2875/**
2876 * Tries to locate a suitable register in the given register mask.
2877 *
2878 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2879 * failed.
2880 *
2881 * @returns Host register number on success, returns UINT8_MAX on failure.
2882 */
2883static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2884{
2885 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2886 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2887 if (fRegs)
2888 {
2889 /** @todo pick better here: */
2890 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2891
2892 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2893 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2894 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2895 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2896
2897 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2898 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2899 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2900 return idxReg;
2901 }
2902 return UINT8_MAX;
2903}
2904#endif /* unused */
2905
2906
2907#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2908/**
2909 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2910 *
2911 * @returns New code buffer offset on success, UINT32_MAX on failure.
2912 * @param pReNative .
2913 * @param off The current code buffer position.
2914 * @param enmGstReg The guest register to store to.
2915 * @param idxHstReg The host register to store from.
2916 */
2917DECL_FORCE_INLINE_THROW(uint32_t)
2918iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2919{
2920 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2921 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2922
2923 switch (g_aGstShadowInfo[enmGstReg].cb)
2924 {
2925 case sizeof(uint64_t):
2926 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2927 case sizeof(uint32_t):
2928 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2929 case sizeof(uint16_t):
2930 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2931#if 0 /* not present in the table. */
2932 case sizeof(uint8_t):
2933 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2934#endif
2935 default:
2936 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2937 }
2938}
2939
2940
2941/**
2942 * Emits code to flush a pending write of the given guest register if any.
2943 *
2944 * @returns New code buffer offset.
2945 * @param pReNative The native recompile state.
2946 * @param off Current code buffer position.
2947 * @param enmGstReg The guest register to flush.
2948 */
2949DECL_HIDDEN_THROW(uint32_t)
2950iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2951{
2952 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2953
2954 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
2955 && enmGstReg <= kIemNativeGstReg_GprLast)
2956 || enmGstReg == kIemNativeGstReg_MxCsr);
2957 Assert( idxHstReg != UINT8_MAX
2958 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2959 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2960 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2961
2962 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2963
2964 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2965 return off;
2966}
2967
2968
2969/**
2970 * Flush the given set of guest registers if marked as dirty.
2971 *
2972 * @returns New code buffer offset.
2973 * @param pReNative The native recompile state.
2974 * @param off Current code buffer position.
2975 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2976 */
2977DECL_HIDDEN_THROW(uint32_t)
2978iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2979{
2980 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2981 if (bmGstRegShadowDirty)
2982 {
2983# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2984 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2985 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2986# endif
2987 do
2988 {
2989 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2990 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2991 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2992 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2993 } while (bmGstRegShadowDirty);
2994 }
2995
2996 return off;
2997}
2998
2999
3000/**
3001 * Flush all shadowed guest registers marked as dirty for the given host register.
3002 *
3003 * @returns New code buffer offset.
3004 * @param pReNative The native recompile state.
3005 * @param off Current code buffer position.
3006 * @param idxHstReg The host register.
3007 *
3008 * @note This doesn't do any unshadowing of guest registers from the host register.
3009 */
3010DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3011{
3012 /* We need to flush any pending guest register writes this host register shadows. */
3013 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3014 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3015 {
3016# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3017 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3018 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3019# endif
3020 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
3021 * likely to only have a single bit set. It'll be in the 0..15 range,
3022 * but still it's 15 unnecessary loops for the last guest register. */
3023
3024 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3025 do
3026 {
3027 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3028 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3029 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3030 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3031 } while (bmGstRegShadowDirty);
3032 }
3033
3034 return off;
3035}
3036#endif
3037
3038
3039/**
3040 * Locate a register, possibly freeing one up.
3041 *
3042 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3043 * failed.
3044 *
3045 * @returns Host register number on success. Returns UINT8_MAX if no registers
3046 * found, the caller is supposed to deal with this and raise a
3047 * allocation type specific status code (if desired).
3048 *
3049 * @throws VBox status code if we're run into trouble spilling a variable of
3050 * recording debug info. Does NOT throw anything if we're out of
3051 * registers, though.
3052 */
3053static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3054 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3055{
3056 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3057 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3058 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3059
3060 /*
3061 * Try a freed register that's shadowing a guest register.
3062 */
3063 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3064 if (fRegs)
3065 {
3066 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3067
3068#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3069 /*
3070 * When we have livness information, we use it to kick out all shadowed
3071 * guest register that will not be needed any more in this TB. If we're
3072 * lucky, this may prevent us from ending up here again.
3073 *
3074 * Note! We must consider the previous entry here so we don't free
3075 * anything that the current threaded function requires (current
3076 * entry is produced by the next threaded function).
3077 */
3078 uint32_t const idxCurCall = pReNative->idxCurCall;
3079 if (idxCurCall > 0)
3080 {
3081 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3082
3083# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3084 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3085 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3086 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3087#else
3088 /* Construct a mask of the registers not in the read or write state.
3089 Note! We could skips writes, if they aren't from us, as this is just
3090 a hack to prevent trashing registers that have just been written
3091 or will be written when we retire the current instruction. */
3092 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3093 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3094 & IEMLIVENESSBIT_MASK;
3095#endif
3096 /* Merge EFLAGS. */
3097 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3098 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3099 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3100 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3101 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3102
3103 /* If it matches any shadowed registers. */
3104 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3105 {
3106#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3107 /* Writeback any dirty shadow registers we are about to unshadow. */
3108 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3109#endif
3110
3111 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3112 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3113 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3114
3115 /* See if we've got any unshadowed registers we can return now. */
3116 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3117 if (fUnshadowedRegs)
3118 {
3119 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3120 return (fPreferVolatile
3121 ? ASMBitFirstSetU32(fUnshadowedRegs)
3122 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3123 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3124 - 1;
3125 }
3126 }
3127 }
3128#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3129
3130 unsigned const idxReg = (fPreferVolatile
3131 ? ASMBitFirstSetU32(fRegs)
3132 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3133 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3134 - 1;
3135
3136 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3137 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3138 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3139 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3140
3141#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3142 /* We need to flush any pending guest register writes this host register shadows. */
3143 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3144#endif
3145
3146 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3147 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3148 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3149 return idxReg;
3150 }
3151
3152 /*
3153 * Try free up a variable that's in a register.
3154 *
3155 * We do two rounds here, first evacuating variables we don't need to be
3156 * saved on the stack, then in the second round move things to the stack.
3157 */
3158 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3159 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3160 {
3161 uint32_t fVars = pReNative->Core.bmVars;
3162 while (fVars)
3163 {
3164 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3165 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3166#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3167 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3168 continue;
3169#endif
3170
3171 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3172 && (RT_BIT_32(idxReg) & fRegMask)
3173 && ( iLoop == 0
3174 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3175 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3176 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3177 {
3178 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3179 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3180 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3181 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3182 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3183 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3184#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3185 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3186#endif
3187
3188 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3189 {
3190 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3191 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3192 }
3193
3194 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3195 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3196
3197 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3198 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3199 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3200 return idxReg;
3201 }
3202 fVars &= ~RT_BIT_32(idxVar);
3203 }
3204 }
3205
3206 return UINT8_MAX;
3207}
3208
3209
3210/**
3211 * Reassigns a variable to a different register specified by the caller.
3212 *
3213 * @returns The new code buffer position.
3214 * @param pReNative The native recompile state.
3215 * @param off The current code buffer position.
3216 * @param idxVar The variable index.
3217 * @param idxRegOld The old host register number.
3218 * @param idxRegNew The new host register number.
3219 * @param pszCaller The caller for logging.
3220 */
3221static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3222 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3223{
3224 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3225 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3226#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3227 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3228#endif
3229 RT_NOREF(pszCaller);
3230
3231#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3232 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3233#endif
3234 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3235
3236 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3237#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3238 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3239#endif
3240 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3241 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3242 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3243
3244 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3245 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3246 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3247 if (fGstRegShadows)
3248 {
3249 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3250 | RT_BIT_32(idxRegNew);
3251 while (fGstRegShadows)
3252 {
3253 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3254 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3255
3256 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3257 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3258 }
3259 }
3260
3261 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3262 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3263 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3264 return off;
3265}
3266
3267
3268/**
3269 * Moves a variable to a different register or spills it onto the stack.
3270 *
3271 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3272 * kinds can easily be recreated if needed later.
3273 *
3274 * @returns The new code buffer position.
3275 * @param pReNative The native recompile state.
3276 * @param off The current code buffer position.
3277 * @param idxVar The variable index.
3278 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3279 * call-volatile registers.
3280 */
3281DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3282 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3283{
3284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3285 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3286 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3287 Assert(!pVar->fRegAcquired);
3288
3289 uint8_t const idxRegOld = pVar->idxReg;
3290 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3291 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3292 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3293 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3294 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3295 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3296 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3297 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3298#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3299 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3300#endif
3301
3302
3303 /** @todo Add statistics on this.*/
3304 /** @todo Implement basic variable liveness analysis (python) so variables
3305 * can be freed immediately once no longer used. This has the potential to
3306 * be trashing registers and stack for dead variables.
3307 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3308
3309 /*
3310 * First try move it to a different register, as that's cheaper.
3311 */
3312 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3313 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3314 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3315 if (fRegs)
3316 {
3317 /* Avoid using shadow registers, if possible. */
3318 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3319 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3320 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3321 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3322 }
3323
3324 /*
3325 * Otherwise we must spill the register onto the stack.
3326 */
3327 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3328 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3329 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3330 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3331
3332 pVar->idxReg = UINT8_MAX;
3333 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3334 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3335 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3336 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3337 return off;
3338}
3339
3340
3341/**
3342 * Allocates a temporary host general purpose register.
3343 *
3344 * This may emit code to save register content onto the stack in order to free
3345 * up a register.
3346 *
3347 * @returns The host register number; throws VBox status code on failure,
3348 * so no need to check the return value.
3349 * @param pReNative The native recompile state.
3350 * @param poff Pointer to the variable with the code buffer position.
3351 * This will be update if we need to move a variable from
3352 * register to stack in order to satisfy the request.
3353 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3354 * registers (@c true, default) or the other way around
3355 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3356 */
3357DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3358{
3359 /*
3360 * Try find a completely unused register, preferably a call-volatile one.
3361 */
3362 uint8_t idxReg;
3363 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3364 & ~pReNative->Core.bmHstRegsWithGstShadow
3365 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3366 if (fRegs)
3367 {
3368 if (fPreferVolatile)
3369 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3370 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3371 else
3372 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3373 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3374 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3375 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3376 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3377 }
3378 else
3379 {
3380 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3381 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3382 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3383 }
3384 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3385}
3386
3387
3388/**
3389 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3390 * registers.
3391 *
3392 * @returns The host register number; throws VBox status code on failure,
3393 * so no need to check the return value.
3394 * @param pReNative The native recompile state.
3395 * @param poff Pointer to the variable with the code buffer position.
3396 * This will be update if we need to move a variable from
3397 * register to stack in order to satisfy the request.
3398 * @param fRegMask Mask of acceptable registers.
3399 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3400 * registers (@c true, default) or the other way around
3401 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3402 */
3403DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3404 bool fPreferVolatile /*= true*/)
3405{
3406 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3407 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3408
3409 /*
3410 * Try find a completely unused register, preferably a call-volatile one.
3411 */
3412 uint8_t idxReg;
3413 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3414 & ~pReNative->Core.bmHstRegsWithGstShadow
3415 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3416 & fRegMask;
3417 if (fRegs)
3418 {
3419 if (fPreferVolatile)
3420 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3421 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3422 else
3423 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3424 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3425 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3426 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3427 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3428 }
3429 else
3430 {
3431 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3432 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3433 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3434 }
3435 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3436}
3437
3438
3439/**
3440 * Allocates a temporary register for loading an immediate value into.
3441 *
3442 * This will emit code to load the immediate, unless there happens to be an
3443 * unused register with the value already loaded.
3444 *
3445 * The caller will not modify the returned register, it must be considered
3446 * read-only. Free using iemNativeRegFreeTmpImm.
3447 *
3448 * @returns The host register number; throws VBox status code on failure, so no
3449 * need to check the return value.
3450 * @param pReNative The native recompile state.
3451 * @param poff Pointer to the variable with the code buffer position.
3452 * @param uImm The immediate value that the register must hold upon
3453 * return.
3454 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3455 * registers (@c true, default) or the other way around
3456 * (@c false).
3457 *
3458 * @note Reusing immediate values has not been implemented yet.
3459 */
3460DECL_HIDDEN_THROW(uint8_t)
3461iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3462{
3463 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3464 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3465 return idxReg;
3466}
3467
3468
3469/**
3470 * Allocates a temporary host general purpose register for keeping a guest
3471 * register value.
3472 *
3473 * Since we may already have a register holding the guest register value,
3474 * code will be emitted to do the loading if that's not the case. Code may also
3475 * be emitted if we have to free up a register to satify the request.
3476 *
3477 * @returns The host register number; throws VBox status code on failure, so no
3478 * need to check the return value.
3479 * @param pReNative The native recompile state.
3480 * @param poff Pointer to the variable with the code buffer
3481 * position. This will be update if we need to move a
3482 * variable from register to stack in order to satisfy
3483 * the request.
3484 * @param enmGstReg The guest register that will is to be updated.
3485 * @param enmIntendedUse How the caller will be using the host register.
3486 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3487 * register is okay (default). The ASSUMPTION here is
3488 * that the caller has already flushed all volatile
3489 * registers, so this is only applied if we allocate a
3490 * new register.
3491 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3492 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3493 */
3494DECL_HIDDEN_THROW(uint8_t)
3495iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3496 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3497 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3498{
3499 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3500#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3501 AssertMsg( fSkipLivenessAssert
3502 || pReNative->idxCurCall == 0
3503 || enmGstReg == kIemNativeGstReg_Pc
3504 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3505 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3506 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3507 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3508 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3509 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3510#endif
3511 RT_NOREF(fSkipLivenessAssert);
3512#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3513 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3514#endif
3515 uint32_t const fRegMask = !fNoVolatileRegs
3516 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3517 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3518
3519 /*
3520 * First check if the guest register value is already in a host register.
3521 */
3522 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3523 {
3524 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3525 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3526 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3527 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3528
3529 /* It's not supposed to be allocated... */
3530 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3531 {
3532 /*
3533 * If the register will trash the guest shadow copy, try find a
3534 * completely unused register we can use instead. If that fails,
3535 * we need to disassociate the host reg from the guest reg.
3536 */
3537 /** @todo would be nice to know if preserving the register is in any way helpful. */
3538 /* If the purpose is calculations, try duplicate the register value as
3539 we'll be clobbering the shadow. */
3540 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3541 && ( ~pReNative->Core.bmHstRegs
3542 & ~pReNative->Core.bmHstRegsWithGstShadow
3543 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3544 {
3545 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3546
3547 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3548
3549 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3550 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3551 g_apszIemNativeHstRegNames[idxRegNew]));
3552 idxReg = idxRegNew;
3553 }
3554 /* If the current register matches the restrictions, go ahead and allocate
3555 it for the caller. */
3556 else if (fRegMask & RT_BIT_32(idxReg))
3557 {
3558 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3559 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3560 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3561 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3562 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3563 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3564 else
3565 {
3566 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3567 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3568 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3569 }
3570 }
3571 /* Otherwise, allocate a register that satisfies the caller and transfer
3572 the shadowing if compatible with the intended use. (This basically
3573 means the call wants a non-volatile register (RSP push/pop scenario).) */
3574 else
3575 {
3576 Assert(fNoVolatileRegs);
3577 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3578 !fNoVolatileRegs
3579 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3580 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3581 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3582 {
3583 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3584 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3585 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3586 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3587 }
3588 else
3589 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3590 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3591 g_apszIemNativeHstRegNames[idxRegNew]));
3592 idxReg = idxRegNew;
3593 }
3594 }
3595 else
3596 {
3597 /*
3598 * Oops. Shadowed guest register already allocated!
3599 *
3600 * Allocate a new register, copy the value and, if updating, the
3601 * guest shadow copy assignment to the new register.
3602 */
3603 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3604 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3605 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3606 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3607
3608 /** @todo share register for readonly access. */
3609 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3610 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3611
3612 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3613 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3614
3615 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3616 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3617 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3618 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3619 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3620 else
3621 {
3622 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3623 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3624 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3625 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3626 }
3627 idxReg = idxRegNew;
3628 }
3629 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3630
3631#ifdef VBOX_STRICT
3632 /* Strict builds: Check that the value is correct. */
3633 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3634#endif
3635
3636#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3637 /** @todo r=aeichner Implement for registers other than GPR as well. */
3638 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3639 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3640 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3641 && enmGstReg <= kIemNativeGstReg_GprLast)
3642 || enmGstReg == kIemNativeGstReg_MxCsr))
3643 {
3644# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3645 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3646 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3647# endif
3648 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3649 }
3650#endif
3651
3652 return idxReg;
3653 }
3654
3655 /*
3656 * Allocate a new register, load it with the guest value and designate it as a copy of the
3657 */
3658 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3659
3660 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3661 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3662
3663 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3664 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3665 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3666 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3667
3668#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3669 /** @todo r=aeichner Implement for registers other than GPR as well. */
3670 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3671 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3672 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3673 && enmGstReg <= kIemNativeGstReg_GprLast)
3674 || enmGstReg == kIemNativeGstReg_MxCsr))
3675 {
3676# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3677 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3678 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3679# endif
3680 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3681 }
3682#endif
3683
3684 return idxRegNew;
3685}
3686
3687
3688/**
3689 * Allocates a temporary host general purpose register that already holds the
3690 * given guest register value.
3691 *
3692 * The use case for this function is places where the shadowing state cannot be
3693 * modified due to branching and such. This will fail if the we don't have a
3694 * current shadow copy handy or if it's incompatible. The only code that will
3695 * be emitted here is value checking code in strict builds.
3696 *
3697 * The intended use can only be readonly!
3698 *
3699 * @returns The host register number, UINT8_MAX if not present.
3700 * @param pReNative The native recompile state.
3701 * @param poff Pointer to the instruction buffer offset.
3702 * Will be updated in strict builds if a register is
3703 * found.
3704 * @param enmGstReg The guest register that will is to be updated.
3705 * @note In strict builds, this may throw instruction buffer growth failures.
3706 * Non-strict builds will not throw anything.
3707 * @sa iemNativeRegAllocTmpForGuestReg
3708 */
3709DECL_HIDDEN_THROW(uint8_t)
3710iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3711{
3712 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3713#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3714 AssertMsg( pReNative->idxCurCall == 0
3715 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3716 || enmGstReg == kIemNativeGstReg_Pc,
3717 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3718#endif
3719
3720 /*
3721 * First check if the guest register value is already in a host register.
3722 */
3723 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3724 {
3725 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3726 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3727 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3728 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3729
3730 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3731 {
3732 /*
3733 * We only do readonly use here, so easy compared to the other
3734 * variant of this code.
3735 */
3736 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3737 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3738 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3739 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3740 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3741
3742#ifdef VBOX_STRICT
3743 /* Strict builds: Check that the value is correct. */
3744 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3745#else
3746 RT_NOREF(poff);
3747#endif
3748 return idxReg;
3749 }
3750 }
3751
3752 return UINT8_MAX;
3753}
3754
3755
3756/**
3757 * Allocates argument registers for a function call.
3758 *
3759 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3760 * need to check the return value.
3761 * @param pReNative The native recompile state.
3762 * @param off The current code buffer offset.
3763 * @param cArgs The number of arguments the function call takes.
3764 */
3765DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3766{
3767 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3768 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3769 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3770 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3771
3772 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3773 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3774 else if (cArgs == 0)
3775 return true;
3776
3777 /*
3778 * Do we get luck and all register are free and not shadowing anything?
3779 */
3780 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3781 for (uint32_t i = 0; i < cArgs; i++)
3782 {
3783 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3784 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3785 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3786 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3787 }
3788 /*
3789 * Okay, not lucky so we have to free up the registers.
3790 */
3791 else
3792 for (uint32_t i = 0; i < cArgs; i++)
3793 {
3794 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3795 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3796 {
3797 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3798 {
3799 case kIemNativeWhat_Var:
3800 {
3801 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3803 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3804 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3805 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3806#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3807 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3808#endif
3809
3810 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3811 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3812 else
3813 {
3814 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3815 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3816 }
3817 break;
3818 }
3819
3820 case kIemNativeWhat_Tmp:
3821 case kIemNativeWhat_Arg:
3822 case kIemNativeWhat_rc:
3823 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3824 default:
3825 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3826 }
3827
3828 }
3829 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3830 {
3831 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3832 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3833 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3834#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3835 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3836#endif
3837 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3838 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3839 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3840 }
3841 else
3842 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3843 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3844 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3845 }
3846 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3847 return true;
3848}
3849
3850
3851DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3852
3853
3854#if 0
3855/**
3856 * Frees a register assignment of any type.
3857 *
3858 * @param pReNative The native recompile state.
3859 * @param idxHstReg The register to free.
3860 *
3861 * @note Does not update variables.
3862 */
3863DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3864{
3865 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3866 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3867 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3868 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3869 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3870 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3871 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3872 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3873 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3874 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3875 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3876 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3877 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3878 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3879
3880 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3881 /* no flushing, right:
3882 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3883 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3884 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3885 */
3886}
3887#endif
3888
3889
3890/**
3891 * Frees a temporary register.
3892 *
3893 * Any shadow copies of guest registers assigned to the host register will not
3894 * be flushed by this operation.
3895 */
3896DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3897{
3898 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3899 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3900 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3901 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3902 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3903}
3904
3905
3906/**
3907 * Frees a temporary immediate register.
3908 *
3909 * It is assumed that the call has not modified the register, so it still hold
3910 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3911 */
3912DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3913{
3914 iemNativeRegFreeTmp(pReNative, idxHstReg);
3915}
3916
3917
3918/**
3919 * Frees a register assigned to a variable.
3920 *
3921 * The register will be disassociated from the variable.
3922 */
3923DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3924{
3925 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3926 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3927 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3928 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3929 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3930#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3931 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3932#endif
3933
3934 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3935 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3936 if (!fFlushShadows)
3937 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3938 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3939 else
3940 {
3941 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3942 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3943#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3944 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3945#endif
3946 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3947 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3948 uint64_t fGstRegShadows = fGstRegShadowsOld;
3949 while (fGstRegShadows)
3950 {
3951 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3952 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3953
3954 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3955 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3956 }
3957 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3958 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3959 }
3960}
3961
3962
3963#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3964# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
3965/** Host CPU SIMD register names. */
3966DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3967{
3968# ifdef RT_ARCH_AMD64
3969 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3970# elif RT_ARCH_ARM64
3971 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3972 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3973# else
3974# error "port me"
3975# endif
3976};
3977# endif
3978
3979
3980/**
3981 * Frees a SIMD register assigned to a variable.
3982 *
3983 * The register will be disassociated from the variable.
3984 */
3985DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3986{
3987 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3988 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3989 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3990 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3991 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3992 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3993
3994 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3995 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3996 if (!fFlushShadows)
3997 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3998 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3999 else
4000 {
4001 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4002 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4003 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4004 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4005 uint64_t fGstRegShadows = fGstRegShadowsOld;
4006 while (fGstRegShadows)
4007 {
4008 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4009 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4010
4011 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4012 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4013 }
4014 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4015 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4016 }
4017}
4018
4019
4020/**
4021 * Reassigns a variable to a different SIMD register specified by the caller.
4022 *
4023 * @returns The new code buffer position.
4024 * @param pReNative The native recompile state.
4025 * @param off The current code buffer position.
4026 * @param idxVar The variable index.
4027 * @param idxRegOld The old host register number.
4028 * @param idxRegNew The new host register number.
4029 * @param pszCaller The caller for logging.
4030 */
4031static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4032 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4033{
4034 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4035 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4036 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4037 RT_NOREF(pszCaller);
4038
4039 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4040 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4041 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4042
4043 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4044 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4045 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4046
4047 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4048 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4049 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4050
4051 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4052 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4053 else
4054 {
4055 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4056 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4057 }
4058
4059 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4060 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4061 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4062 if (fGstRegShadows)
4063 {
4064 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4065 | RT_BIT_32(idxRegNew);
4066 while (fGstRegShadows)
4067 {
4068 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4069 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4070
4071 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4072 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4073 }
4074 }
4075
4076 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4077 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4078 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4079 return off;
4080}
4081
4082
4083/**
4084 * Moves a variable to a different register or spills it onto the stack.
4085 *
4086 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4087 * kinds can easily be recreated if needed later.
4088 *
4089 * @returns The new code buffer position.
4090 * @param pReNative The native recompile state.
4091 * @param off The current code buffer position.
4092 * @param idxVar The variable index.
4093 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4094 * call-volatile registers.
4095 */
4096DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4097 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4098{
4099 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4100 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4101 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4102 Assert(!pVar->fRegAcquired);
4103 Assert(!pVar->fSimdReg);
4104
4105 uint8_t const idxRegOld = pVar->idxReg;
4106 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4107 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4108 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4109 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4110 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4111 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4112 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4113 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4114 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4115 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4116
4117 /** @todo Add statistics on this.*/
4118 /** @todo Implement basic variable liveness analysis (python) so variables
4119 * can be freed immediately once no longer used. This has the potential to
4120 * be trashing registers and stack for dead variables.
4121 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4122
4123 /*
4124 * First try move it to a different register, as that's cheaper.
4125 */
4126 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4127 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4128 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4129 if (fRegs)
4130 {
4131 /* Avoid using shadow registers, if possible. */
4132 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4133 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4134 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4135 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4136 }
4137
4138 /*
4139 * Otherwise we must spill the register onto the stack.
4140 */
4141 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4142 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4143 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4144
4145 if (pVar->cbVar == sizeof(RTUINT128U))
4146 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4147 else
4148 {
4149 Assert(pVar->cbVar == sizeof(RTUINT256U));
4150 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4151 }
4152
4153 pVar->idxReg = UINT8_MAX;
4154 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4155 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4156 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4157 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4158 return off;
4159}
4160
4161
4162/**
4163 * Called right before emitting a call instruction to move anything important
4164 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4165 * optionally freeing argument variables.
4166 *
4167 * @returns New code buffer offset, UINT32_MAX on failure.
4168 * @param pReNative The native recompile state.
4169 * @param off The code buffer offset.
4170 * @param cArgs The number of arguments the function call takes.
4171 * It is presumed that the host register part of these have
4172 * been allocated as such already and won't need moving,
4173 * just freeing.
4174 * @param fKeepVars Mask of variables that should keep their register
4175 * assignments. Caller must take care to handle these.
4176 */
4177DECL_HIDDEN_THROW(uint32_t)
4178iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4179{
4180 Assert(!cArgs); RT_NOREF(cArgs);
4181
4182 /* fKeepVars will reduce this mask. */
4183 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4184
4185 /*
4186 * Move anything important out of volatile registers.
4187 */
4188 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4189#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4190 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4191#endif
4192 ;
4193
4194 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4195 if (!fSimdRegsToMove)
4196 { /* likely */ }
4197 else
4198 {
4199 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4200 while (fSimdRegsToMove != 0)
4201 {
4202 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4203 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4204
4205 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4206 {
4207 case kIemNativeWhat_Var:
4208 {
4209 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4210 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4211 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4212 Assert(pVar->idxReg == idxSimdReg);
4213 Assert(pVar->fSimdReg);
4214 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4215 {
4216 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4217 idxVar, pVar->enmKind, pVar->idxReg));
4218 if (pVar->enmKind != kIemNativeVarKind_Stack)
4219 pVar->idxReg = UINT8_MAX;
4220 else
4221 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4222 }
4223 else
4224 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4225 continue;
4226 }
4227
4228 case kIemNativeWhat_Arg:
4229 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4230 continue;
4231
4232 case kIemNativeWhat_rc:
4233 case kIemNativeWhat_Tmp:
4234 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4235 continue;
4236
4237 case kIemNativeWhat_FixedReserved:
4238#ifdef RT_ARCH_ARM64
4239 continue; /* On ARM the upper half of the virtual 256-bit register. */
4240#endif
4241
4242 case kIemNativeWhat_FixedTmp:
4243 case kIemNativeWhat_pVCpuFixed:
4244 case kIemNativeWhat_pCtxFixed:
4245 case kIemNativeWhat_PcShadow:
4246 case kIemNativeWhat_Invalid:
4247 case kIemNativeWhat_End:
4248 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4249 }
4250 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4251 }
4252 }
4253
4254 /*
4255 * Do the actual freeing.
4256 */
4257 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4258 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4259 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4260 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4261
4262 /* If there are guest register shadows in any call-volatile register, we
4263 have to clear the corrsponding guest register masks for each register. */
4264 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4265 if (fHstSimdRegsWithGstShadow)
4266 {
4267 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4268 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4269 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4270 do
4271 {
4272 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4273 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4274
4275 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4276
4277#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4278 /*
4279 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4280 * to call volatile registers).
4281 */
4282 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4283 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4284 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4285#endif
4286 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4287 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4288
4289 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4290 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4291 } while (fHstSimdRegsWithGstShadow != 0);
4292 }
4293
4294 return off;
4295}
4296#endif
4297
4298
4299/**
4300 * Called right before emitting a call instruction to move anything important
4301 * out of call-volatile registers, free and flush the call-volatile registers,
4302 * optionally freeing argument variables.
4303 *
4304 * @returns New code buffer offset, UINT32_MAX on failure.
4305 * @param pReNative The native recompile state.
4306 * @param off The code buffer offset.
4307 * @param cArgs The number of arguments the function call takes.
4308 * It is presumed that the host register part of these have
4309 * been allocated as such already and won't need moving,
4310 * just freeing.
4311 * @param fKeepVars Mask of variables that should keep their register
4312 * assignments. Caller must take care to handle these.
4313 */
4314DECL_HIDDEN_THROW(uint32_t)
4315iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4316{
4317 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4318
4319 /* fKeepVars will reduce this mask. */
4320 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4321
4322 /*
4323 * Move anything important out of volatile registers.
4324 */
4325 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4326 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4327 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4328#ifdef IEMNATIVE_REG_FIXED_TMP0
4329 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4330#endif
4331#ifdef IEMNATIVE_REG_FIXED_TMP1
4332 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4333#endif
4334#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4335 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4336#endif
4337 & ~g_afIemNativeCallRegs[cArgs];
4338
4339 fRegsToMove &= pReNative->Core.bmHstRegs;
4340 if (!fRegsToMove)
4341 { /* likely */ }
4342 else
4343 {
4344 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4345 while (fRegsToMove != 0)
4346 {
4347 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4348 fRegsToMove &= ~RT_BIT_32(idxReg);
4349
4350 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4351 {
4352 case kIemNativeWhat_Var:
4353 {
4354 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4355 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4356 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4357 Assert(pVar->idxReg == idxReg);
4358#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4359 Assert(!pVar->fSimdReg);
4360#endif
4361 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4362 {
4363 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4364 idxVar, pVar->enmKind, pVar->idxReg));
4365 if (pVar->enmKind != kIemNativeVarKind_Stack)
4366 pVar->idxReg = UINT8_MAX;
4367 else
4368 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4369 }
4370 else
4371 fRegsToFree &= ~RT_BIT_32(idxReg);
4372 continue;
4373 }
4374
4375 case kIemNativeWhat_Arg:
4376 AssertMsgFailed(("What?!?: %u\n", idxReg));
4377 continue;
4378
4379 case kIemNativeWhat_rc:
4380 case kIemNativeWhat_Tmp:
4381 AssertMsgFailed(("Missing free: %u\n", idxReg));
4382 continue;
4383
4384 case kIemNativeWhat_FixedTmp:
4385 case kIemNativeWhat_pVCpuFixed:
4386 case kIemNativeWhat_pCtxFixed:
4387 case kIemNativeWhat_PcShadow:
4388 case kIemNativeWhat_FixedReserved:
4389 case kIemNativeWhat_Invalid:
4390 case kIemNativeWhat_End:
4391 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4392 }
4393 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4394 }
4395 }
4396
4397 /*
4398 * Do the actual freeing.
4399 */
4400 if (pReNative->Core.bmHstRegs & fRegsToFree)
4401 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4402 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4403 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4404
4405 /* If there are guest register shadows in any call-volatile register, we
4406 have to clear the corrsponding guest register masks for each register. */
4407 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4408 if (fHstRegsWithGstShadow)
4409 {
4410 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4411 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4412 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4413 do
4414 {
4415 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4416 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4417
4418 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4419
4420#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4421 /*
4422 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4423 * to call volatile registers).
4424 */
4425 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4426 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4427 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4428#endif
4429
4430 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4431 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4432 } while (fHstRegsWithGstShadow != 0);
4433 }
4434
4435#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4436 /* Now for the SIMD registers, no argument support for now. */
4437 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4438#endif
4439
4440 return off;
4441}
4442
4443
4444/**
4445 * Flushes a set of guest register shadow copies.
4446 *
4447 * This is usually done after calling a threaded function or a C-implementation
4448 * of an instruction.
4449 *
4450 * @param pReNative The native recompile state.
4451 * @param fGstRegs Set of guest registers to flush.
4452 */
4453DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4454{
4455 /*
4456 * Reduce the mask by what's currently shadowed
4457 */
4458 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4459 fGstRegs &= bmGstRegShadowsOld;
4460 if (fGstRegs)
4461 {
4462 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4463 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4464 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4465 if (bmGstRegShadowsNew)
4466 {
4467 /*
4468 * Partial.
4469 */
4470 do
4471 {
4472 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4473 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4474 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4475 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4476 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4477#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4478 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4479#endif
4480
4481 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4482 fGstRegs &= ~fInThisHstReg;
4483 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4484 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4485 if (!fGstRegShadowsNew)
4486 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4487 } while (fGstRegs != 0);
4488 }
4489 else
4490 {
4491 /*
4492 * Clear all.
4493 */
4494 do
4495 {
4496 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4497 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4498 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4499 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4500 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4501#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4502 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4503#endif
4504
4505 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4506 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4507 } while (fGstRegs != 0);
4508 pReNative->Core.bmHstRegsWithGstShadow = 0;
4509 }
4510 }
4511}
4512
4513
4514/**
4515 * Flushes guest register shadow copies held by a set of host registers.
4516 *
4517 * This is used with the TLB lookup code for ensuring that we don't carry on
4518 * with any guest shadows in volatile registers, as these will get corrupted by
4519 * a TLB miss.
4520 *
4521 * @param pReNative The native recompile state.
4522 * @param fHstRegs Set of host registers to flush guest shadows for.
4523 */
4524DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4525{
4526 /*
4527 * Reduce the mask by what's currently shadowed.
4528 */
4529 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4530 fHstRegs &= bmHstRegsWithGstShadowOld;
4531 if (fHstRegs)
4532 {
4533 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4534 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4535 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4536 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4537 if (bmHstRegsWithGstShadowNew)
4538 {
4539 /*
4540 * Partial (likely).
4541 */
4542 uint64_t fGstShadows = 0;
4543 do
4544 {
4545 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4546 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4547 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4548 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4549#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4550 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4551#endif
4552
4553 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4554 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4555 fHstRegs &= ~RT_BIT_32(idxHstReg);
4556 } while (fHstRegs != 0);
4557 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4558 }
4559 else
4560 {
4561 /*
4562 * Clear all.
4563 */
4564 do
4565 {
4566 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4567 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4568 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4569 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4570#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4571 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4572#endif
4573
4574 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4575 fHstRegs &= ~RT_BIT_32(idxHstReg);
4576 } while (fHstRegs != 0);
4577 pReNative->Core.bmGstRegShadows = 0;
4578 }
4579 }
4580}
4581
4582
4583/**
4584 * Restores guest shadow copies in volatile registers.
4585 *
4586 * This is used after calling a helper function (think TLB miss) to restore the
4587 * register state of volatile registers.
4588 *
4589 * @param pReNative The native recompile state.
4590 * @param off The code buffer offset.
4591 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4592 * be active (allocated) w/o asserting. Hack.
4593 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4594 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4595 */
4596DECL_HIDDEN_THROW(uint32_t)
4597iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4598{
4599 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4600 if (fHstRegs)
4601 {
4602 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4603 do
4604 {
4605 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4606
4607 /* It's not fatal if a register is active holding a variable that
4608 shadowing a guest register, ASSUMING all pending guest register
4609 writes were flushed prior to the helper call. However, we'll be
4610 emitting duplicate restores, so it wasts code space. */
4611 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4612 RT_NOREF(fHstRegsActiveShadows);
4613
4614 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4615#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4616 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4617#endif
4618 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4619 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4620 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4621
4622 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4623 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4624
4625 fHstRegs &= ~RT_BIT_32(idxHstReg);
4626 } while (fHstRegs != 0);
4627 }
4628 return off;
4629}
4630
4631
4632
4633
4634/*********************************************************************************************************************************
4635* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4636*********************************************************************************************************************************/
4637#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4638
4639/**
4640 * Info about shadowed guest SIMD register values.
4641 * @see IEMNATIVEGSTSIMDREG
4642 */
4643static struct
4644{
4645 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4646 uint32_t offXmm;
4647 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4648 uint32_t offYmm;
4649 /** Name (for logging). */
4650 const char *pszName;
4651} const g_aGstSimdShadowInfo[] =
4652{
4653#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4654 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4655 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4656 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4657 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4658 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4659 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4660 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4661 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4662 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4663 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4664 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4665 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4666 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4667 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4668 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4669 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4670 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4671#undef CPUMCTX_OFF_AND_SIZE
4672};
4673AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4674
4675
4676/**
4677 * Frees a temporary SIMD register.
4678 *
4679 * Any shadow copies of guest registers assigned to the host register will not
4680 * be flushed by this operation.
4681 */
4682DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4683{
4684 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4685 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4686 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4687 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4688 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4689}
4690
4691
4692/**
4693 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4694 *
4695 * @returns New code bufferoffset.
4696 * @param pReNative The native recompile state.
4697 * @param off Current code buffer position.
4698 * @param enmGstSimdReg The guest SIMD register to flush.
4699 */
4700DECL_HIDDEN_THROW(uint32_t)
4701iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4702{
4703 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4704
4705 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4706 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4707 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4708 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4709
4710 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4711 {
4712 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4713 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4714 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4715 }
4716
4717 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4718 {
4719 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4720 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4721 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4722 }
4723
4724 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4725 return off;
4726}
4727
4728
4729/**
4730 * Flush the given set of guest SIMD registers if marked as dirty.
4731 *
4732 * @returns New code buffer offset.
4733 * @param pReNative The native recompile state.
4734 * @param off Current code buffer position.
4735 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4736 */
4737DECL_HIDDEN_THROW(uint32_t)
4738iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4739{
4740 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4741 & fFlushGstSimdReg;
4742 if (bmGstSimdRegShadowDirty)
4743 {
4744# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4745 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4746 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4747# endif
4748
4749 do
4750 {
4751 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4752 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4753 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4754 } while (bmGstSimdRegShadowDirty);
4755 }
4756
4757 return off;
4758}
4759
4760
4761#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4762/**
4763 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4764 *
4765 * @returns New code buffer offset.
4766 * @param pReNative The native recompile state.
4767 * @param off Current code buffer position.
4768 * @param idxHstSimdReg The host SIMD register.
4769 *
4770 * @note This doesn't do any unshadowing of guest registers from the host register.
4771 */
4772DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4773{
4774 /* We need to flush any pending guest register writes this host register shadows. */
4775 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4776 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4777 if (bmGstSimdRegShadowDirty)
4778 {
4779# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4780 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4781 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4782# endif
4783
4784 do
4785 {
4786 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4787 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4788 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4789 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4790 } while (bmGstSimdRegShadowDirty);
4791 }
4792
4793 return off;
4794}
4795#endif
4796
4797
4798/**
4799 * Locate a register, possibly freeing one up.
4800 *
4801 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4802 * failed.
4803 *
4804 * @returns Host register number on success. Returns UINT8_MAX if no registers
4805 * found, the caller is supposed to deal with this and raise a
4806 * allocation type specific status code (if desired).
4807 *
4808 * @throws VBox status code if we're run into trouble spilling a variable of
4809 * recording debug info. Does NOT throw anything if we're out of
4810 * registers, though.
4811 */
4812static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4813 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4814{
4815 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4816 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4817 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4818
4819 /*
4820 * Try a freed register that's shadowing a guest register.
4821 */
4822 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4823 if (fRegs)
4824 {
4825 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4826
4827#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4828 /*
4829 * When we have livness information, we use it to kick out all shadowed
4830 * guest register that will not be needed any more in this TB. If we're
4831 * lucky, this may prevent us from ending up here again.
4832 *
4833 * Note! We must consider the previous entry here so we don't free
4834 * anything that the current threaded function requires (current
4835 * entry is produced by the next threaded function).
4836 */
4837 uint32_t const idxCurCall = pReNative->idxCurCall;
4838 if (idxCurCall > 0)
4839 {
4840 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4841
4842# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4843 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4844 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4845 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4846#else
4847 /* Construct a mask of the registers not in the read or write state.
4848 Note! We could skips writes, if they aren't from us, as this is just
4849 a hack to prevent trashing registers that have just been written
4850 or will be written when we retire the current instruction. */
4851 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4852 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4853 & IEMLIVENESSBIT_MASK;
4854#endif
4855 /* If it matches any shadowed registers. */
4856 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4857 {
4858 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4859 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4860 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4861
4862 /* See if we've got any unshadowed registers we can return now. */
4863 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4864 if (fUnshadowedRegs)
4865 {
4866 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4867 return (fPreferVolatile
4868 ? ASMBitFirstSetU32(fUnshadowedRegs)
4869 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4870 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4871 - 1;
4872 }
4873 }
4874 }
4875#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4876
4877 unsigned const idxReg = (fPreferVolatile
4878 ? ASMBitFirstSetU32(fRegs)
4879 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4880 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4881 - 1;
4882
4883 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4884 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4885 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4886 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4887
4888 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4889 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4890
4891 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4892 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4893 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4894 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4895 return idxReg;
4896 }
4897
4898 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4899
4900 /*
4901 * Try free up a variable that's in a register.
4902 *
4903 * We do two rounds here, first evacuating variables we don't need to be
4904 * saved on the stack, then in the second round move things to the stack.
4905 */
4906 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4907 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4908 {
4909 uint32_t fVars = pReNative->Core.bmVars;
4910 while (fVars)
4911 {
4912 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4913 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4914 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4915 continue;
4916
4917 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4918 && (RT_BIT_32(idxReg) & fRegMask)
4919 && ( iLoop == 0
4920 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4921 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4922 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4923 {
4924 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4925 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4926 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4927 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4928 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4929 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4930
4931 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4932 {
4933 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4934 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4935 }
4936
4937 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4938 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4939
4940 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4941 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4942 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4943 return idxReg;
4944 }
4945 fVars &= ~RT_BIT_32(idxVar);
4946 }
4947 }
4948
4949 AssertFailed();
4950 return UINT8_MAX;
4951}
4952
4953
4954/**
4955 * Flushes a set of guest register shadow copies.
4956 *
4957 * This is usually done after calling a threaded function or a C-implementation
4958 * of an instruction.
4959 *
4960 * @param pReNative The native recompile state.
4961 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4962 */
4963DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4964{
4965 /*
4966 * Reduce the mask by what's currently shadowed
4967 */
4968 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4969 fGstSimdRegs &= bmGstSimdRegShadows;
4970 if (fGstSimdRegs)
4971 {
4972 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4973 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4974 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4975 if (bmGstSimdRegShadowsNew)
4976 {
4977 /*
4978 * Partial.
4979 */
4980 do
4981 {
4982 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4983 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4984 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4985 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4986 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4987 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4988
4989 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4990 fGstSimdRegs &= ~fInThisHstReg;
4991 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4992 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4993 if (!fGstRegShadowsNew)
4994 {
4995 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4996 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4997 }
4998 } while (fGstSimdRegs != 0);
4999 }
5000 else
5001 {
5002 /*
5003 * Clear all.
5004 */
5005 do
5006 {
5007 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5008 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5009 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5010 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5011 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5012 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5013
5014 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5015 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5016 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5017 } while (fGstSimdRegs != 0);
5018 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5019 }
5020 }
5021}
5022
5023
5024/**
5025 * Allocates a temporary host SIMD register.
5026 *
5027 * This may emit code to save register content onto the stack in order to free
5028 * up a register.
5029 *
5030 * @returns The host register number; throws VBox status code on failure,
5031 * so no need to check the return value.
5032 * @param pReNative The native recompile state.
5033 * @param poff Pointer to the variable with the code buffer position.
5034 * This will be update if we need to move a variable from
5035 * register to stack in order to satisfy the request.
5036 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5037 * registers (@c true, default) or the other way around
5038 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5039 */
5040DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5041{
5042 /*
5043 * Try find a completely unused register, preferably a call-volatile one.
5044 */
5045 uint8_t idxSimdReg;
5046 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5047 & ~pReNative->Core.bmHstRegsWithGstShadow
5048 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5049 if (fRegs)
5050 {
5051 if (fPreferVolatile)
5052 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5053 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5054 else
5055 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5056 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5057 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5058 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5059
5060 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5061 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5062 }
5063 else
5064 {
5065 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5066 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5067 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5068 }
5069
5070 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5071 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5072}
5073
5074
5075/**
5076 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5077 * registers.
5078 *
5079 * @returns The host register number; throws VBox status code on failure,
5080 * so no need to check the return value.
5081 * @param pReNative The native recompile state.
5082 * @param poff Pointer to the variable with the code buffer position.
5083 * This will be update if we need to move a variable from
5084 * register to stack in order to satisfy the request.
5085 * @param fRegMask Mask of acceptable registers.
5086 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5087 * registers (@c true, default) or the other way around
5088 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5089 */
5090DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5091 bool fPreferVolatile /*= true*/)
5092{
5093 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5094 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5095
5096 /*
5097 * Try find a completely unused register, preferably a call-volatile one.
5098 */
5099 uint8_t idxSimdReg;
5100 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5101 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5102 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5103 & fRegMask;
5104 if (fRegs)
5105 {
5106 if (fPreferVolatile)
5107 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5108 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5109 else
5110 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5111 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5112 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5113 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5114
5115 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5116 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5117 }
5118 else
5119 {
5120 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5121 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5122 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5123 }
5124
5125 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5126 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5127}
5128
5129
5130/**
5131 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5132 *
5133 * @param pReNative The native recompile state.
5134 * @param idxHstSimdReg The host SIMD register to update the state for.
5135 * @param enmLoadSz The load size to set.
5136 */
5137DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5138 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5139{
5140 /* Everything valid already? -> nothing to do. */
5141 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5142 return;
5143
5144 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5145 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5146 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5147 {
5148 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5149 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5150 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5151 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5152 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5153 }
5154}
5155
5156
5157static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5158 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5159{
5160 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5161 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5162 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5163 {
5164# ifdef RT_ARCH_ARM64
5165 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5166 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5167# endif
5168
5169 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5170 {
5171 switch (enmLoadSzDst)
5172 {
5173 case kIemNativeGstSimdRegLdStSz_256:
5174 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5175 break;
5176 case kIemNativeGstSimdRegLdStSz_Low128:
5177 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5178 break;
5179 case kIemNativeGstSimdRegLdStSz_High128:
5180 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5181 break;
5182 default:
5183 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5184 }
5185
5186 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5187 }
5188 }
5189 else
5190 {
5191 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5192 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5193 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5194 }
5195
5196 return off;
5197}
5198
5199
5200/**
5201 * Allocates a temporary host SIMD register for keeping a guest
5202 * SIMD register value.
5203 *
5204 * Since we may already have a register holding the guest register value,
5205 * code will be emitted to do the loading if that's not the case. Code may also
5206 * be emitted if we have to free up a register to satify the request.
5207 *
5208 * @returns The host register number; throws VBox status code on failure, so no
5209 * need to check the return value.
5210 * @param pReNative The native recompile state.
5211 * @param poff Pointer to the variable with the code buffer
5212 * position. This will be update if we need to move a
5213 * variable from register to stack in order to satisfy
5214 * the request.
5215 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5216 * @param enmIntendedUse How the caller will be using the host register.
5217 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5218 * register is okay (default). The ASSUMPTION here is
5219 * that the caller has already flushed all volatile
5220 * registers, so this is only applied if we allocate a
5221 * new register.
5222 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5223 */
5224DECL_HIDDEN_THROW(uint8_t)
5225iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5226 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5227 bool fNoVolatileRegs /*= false*/)
5228{
5229 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5230#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5231 AssertMsg( pReNative->idxCurCall == 0
5232 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5233 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5234 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5235 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5236 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5237 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5238#endif
5239#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5240 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5241#endif
5242 uint32_t const fRegMask = !fNoVolatileRegs
5243 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5244 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5245
5246 /*
5247 * First check if the guest register value is already in a host register.
5248 */
5249 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5250 {
5251 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5252 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5253 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5254 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5255
5256 /* It's not supposed to be allocated... */
5257 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5258 {
5259 /*
5260 * If the register will trash the guest shadow copy, try find a
5261 * completely unused register we can use instead. If that fails,
5262 * we need to disassociate the host reg from the guest reg.
5263 */
5264 /** @todo would be nice to know if preserving the register is in any way helpful. */
5265 /* If the purpose is calculations, try duplicate the register value as
5266 we'll be clobbering the shadow. */
5267 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5268 && ( ~pReNative->Core.bmHstSimdRegs
5269 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5270 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5271 {
5272 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5273
5274 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5275
5276 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5277 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5278 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5279 idxSimdReg = idxRegNew;
5280 }
5281 /* If the current register matches the restrictions, go ahead and allocate
5282 it for the caller. */
5283 else if (fRegMask & RT_BIT_32(idxSimdReg))
5284 {
5285 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5286 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5287 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5288 {
5289 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5290 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5291 else
5292 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5293 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5294 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5295 }
5296 else
5297 {
5298 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5299 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5300 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5301 }
5302 }
5303 /* Otherwise, allocate a register that satisfies the caller and transfer
5304 the shadowing if compatible with the intended use. (This basically
5305 means the call wants a non-volatile register (RSP push/pop scenario).) */
5306 else
5307 {
5308 Assert(fNoVolatileRegs);
5309 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5310 !fNoVolatileRegs
5311 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5312 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5313 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5314 {
5315 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5316 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5317 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5318 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5319 }
5320 else
5321 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5322 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5323 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5324 idxSimdReg = idxRegNew;
5325 }
5326 }
5327 else
5328 {
5329 /*
5330 * Oops. Shadowed guest register already allocated!
5331 *
5332 * Allocate a new register, copy the value and, if updating, the
5333 * guest shadow copy assignment to the new register.
5334 */
5335 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5336 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5337 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5338 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5339
5340 /** @todo share register for readonly access. */
5341 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5342 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5343
5344 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5345 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5346 else
5347 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5348
5349 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5350 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5351 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5352 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5353 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5354 else
5355 {
5356 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5357 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5358 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5359 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5360 }
5361 idxSimdReg = idxRegNew;
5362 }
5363 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5364
5365#ifdef VBOX_STRICT
5366 /* Strict builds: Check that the value is correct. */
5367 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5368 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5369#endif
5370
5371 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5372 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5373 {
5374# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5375 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5376 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5377# endif
5378
5379 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5380 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5381 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5382 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5383 else
5384 {
5385 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5386 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5387 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5388 }
5389 }
5390
5391 return idxSimdReg;
5392 }
5393
5394 /*
5395 * Allocate a new register, load it with the guest value and designate it as a copy of the
5396 */
5397 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5398
5399 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5400 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5401 else
5402 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5403
5404 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5405 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5406
5407 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5408 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5409 {
5410# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5411 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5412 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5413# endif
5414
5415 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5416 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5417 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5418 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5419 else
5420 {
5421 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5422 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5423 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5424 }
5425 }
5426
5427 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5428 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5429
5430 return idxRegNew;
5431}
5432
5433
5434/**
5435 * Flushes guest SIMD register shadow copies held by a set of host registers.
5436 *
5437 * This is used whenever calling an external helper for ensuring that we don't carry on
5438 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5439 *
5440 * @param pReNative The native recompile state.
5441 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5442 */
5443DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5444{
5445 /*
5446 * Reduce the mask by what's currently shadowed.
5447 */
5448 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5449 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5450 if (fHstSimdRegs)
5451 {
5452 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5453 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5454 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5455 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5456 if (bmHstSimdRegsWithGstShadowNew)
5457 {
5458 /*
5459 * Partial (likely).
5460 */
5461 uint64_t fGstShadows = 0;
5462 do
5463 {
5464 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5465 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5466 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5467 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5468 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5469 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5470
5471 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5472 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5473 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5474 } while (fHstSimdRegs != 0);
5475 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5476 }
5477 else
5478 {
5479 /*
5480 * Clear all.
5481 */
5482 do
5483 {
5484 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5485 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5486 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5487 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5488 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5489 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5490
5491 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5492 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5493 } while (fHstSimdRegs != 0);
5494 pReNative->Core.bmGstSimdRegShadows = 0;
5495 }
5496 }
5497}
5498#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5499
5500
5501
5502/*********************************************************************************************************************************
5503* Code emitters for flushing pending guest register writes and sanity checks *
5504*********************************************************************************************************************************/
5505
5506#ifdef VBOX_STRICT
5507/**
5508 * Does internal register allocator sanity checks.
5509 */
5510DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5511{
5512 /*
5513 * Iterate host registers building a guest shadowing set.
5514 */
5515 uint64_t bmGstRegShadows = 0;
5516 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5517 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5518 while (bmHstRegsWithGstShadow)
5519 {
5520 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5521 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5522 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5523
5524 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5525 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5526 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5527 bmGstRegShadows |= fThisGstRegShadows;
5528 while (fThisGstRegShadows)
5529 {
5530 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5531 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5532 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5533 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5534 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5535 }
5536 }
5537 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5538 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5539 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5540
5541 /*
5542 * Now the other way around, checking the guest to host index array.
5543 */
5544 bmHstRegsWithGstShadow = 0;
5545 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5546 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5547 while (bmGstRegShadows)
5548 {
5549 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5550 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5551 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5552
5553 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5554 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5555 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5556 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5557 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5558 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5559 }
5560 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5561 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5562 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5563}
5564#endif /* VBOX_STRICT */
5565
5566
5567/**
5568 * Flushes any delayed guest register writes.
5569 *
5570 * This must be called prior to calling CImpl functions and any helpers that use
5571 * the guest state (like raising exceptions) and such.
5572 *
5573 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5574 * the caller if it wishes to do so.
5575 */
5576DECL_HIDDEN_THROW(uint32_t)
5577iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5578{
5579#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5580 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5581 off = iemNativeEmitPcWriteback(pReNative, off);
5582#else
5583 RT_NOREF(pReNative, fGstShwExcept);
5584#endif
5585
5586#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5587 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5588#endif
5589
5590#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5591 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5592#endif
5593
5594 return off;
5595}
5596
5597
5598#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5599/**
5600 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5601 */
5602DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5603{
5604 Assert(pReNative->Core.offPc);
5605# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5606 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5607 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5608# endif
5609
5610# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5611 /* Allocate a temporary PC register. */
5612 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5613
5614 /* Perform the addition and store the result. */
5615 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5616 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5617
5618 /* Free but don't flush the PC register. */
5619 iemNativeRegFreeTmp(pReNative, idxPcReg);
5620# else
5621 /* Compare the shadow with the context value, they should match. */
5622 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5623 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5624# endif
5625
5626 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5627 pReNative->Core.offPc = 0;
5628 pReNative->Core.cInstrPcUpdateSkipped = 0;
5629
5630 return off;
5631}
5632#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5633
5634
5635/*********************************************************************************************************************************
5636* Code Emitters (larger snippets) *
5637*********************************************************************************************************************************/
5638
5639/**
5640 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5641 * extending to 64-bit width.
5642 *
5643 * @returns New code buffer offset on success, UINT32_MAX on failure.
5644 * @param pReNative .
5645 * @param off The current code buffer position.
5646 * @param idxHstReg The host register to load the guest register value into.
5647 * @param enmGstReg The guest register to load.
5648 *
5649 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5650 * that is something the caller needs to do if applicable.
5651 */
5652DECL_HIDDEN_THROW(uint32_t)
5653iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5654{
5655 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5656 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5657
5658 switch (g_aGstShadowInfo[enmGstReg].cb)
5659 {
5660 case sizeof(uint64_t):
5661 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5662 case sizeof(uint32_t):
5663 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5664 case sizeof(uint16_t):
5665 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5666#if 0 /* not present in the table. */
5667 case sizeof(uint8_t):
5668 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5669#endif
5670 default:
5671 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5672 }
5673}
5674
5675
5676#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5677/**
5678 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5679 *
5680 * @returns New code buffer offset on success, UINT32_MAX on failure.
5681 * @param pReNative The recompiler state.
5682 * @param off The current code buffer position.
5683 * @param idxHstSimdReg The host register to load the guest register value into.
5684 * @param enmGstSimdReg The guest register to load.
5685 * @param enmLoadSz The load size of the register.
5686 *
5687 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5688 * that is something the caller needs to do if applicable.
5689 */
5690DECL_HIDDEN_THROW(uint32_t)
5691iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5692 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5693{
5694 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5695
5696 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5697 switch (enmLoadSz)
5698 {
5699 case kIemNativeGstSimdRegLdStSz_256:
5700 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5701 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5702 case kIemNativeGstSimdRegLdStSz_Low128:
5703 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5704 case kIemNativeGstSimdRegLdStSz_High128:
5705 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5706 default:
5707 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5708 }
5709}
5710#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5711
5712#ifdef VBOX_STRICT
5713
5714/**
5715 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5716 *
5717 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5718 * Trashes EFLAGS on AMD64.
5719 */
5720DECL_HIDDEN_THROW(uint32_t)
5721iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5722{
5723# ifdef RT_ARCH_AMD64
5724 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5725
5726 /* rol reg64, 32 */
5727 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5728 pbCodeBuf[off++] = 0xc1;
5729 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5730 pbCodeBuf[off++] = 32;
5731
5732 /* test reg32, ffffffffh */
5733 if (idxReg >= 8)
5734 pbCodeBuf[off++] = X86_OP_REX_B;
5735 pbCodeBuf[off++] = 0xf7;
5736 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5737 pbCodeBuf[off++] = 0xff;
5738 pbCodeBuf[off++] = 0xff;
5739 pbCodeBuf[off++] = 0xff;
5740 pbCodeBuf[off++] = 0xff;
5741
5742 /* je/jz +1 */
5743 pbCodeBuf[off++] = 0x74;
5744 pbCodeBuf[off++] = 0x01;
5745
5746 /* int3 */
5747 pbCodeBuf[off++] = 0xcc;
5748
5749 /* rol reg64, 32 */
5750 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5751 pbCodeBuf[off++] = 0xc1;
5752 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5753 pbCodeBuf[off++] = 32;
5754
5755# elif defined(RT_ARCH_ARM64)
5756 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5757 /* lsr tmp0, reg64, #32 */
5758 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5759 /* cbz tmp0, +1 */
5760 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5761 /* brk #0x1100 */
5762 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5763
5764# else
5765# error "Port me!"
5766# endif
5767 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5768 return off;
5769}
5770
5771
5772/**
5773 * Emitting code that checks that the content of register @a idxReg is the same
5774 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5775 * instruction if that's not the case.
5776 *
5777 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5778 * Trashes EFLAGS on AMD64.
5779 */
5780DECL_HIDDEN_THROW(uint32_t)
5781iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5782{
5783#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5784 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5785 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5786 return off;
5787#endif
5788
5789# ifdef RT_ARCH_AMD64
5790 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5791
5792 /* cmp reg, [mem] */
5793 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5794 {
5795 if (idxReg >= 8)
5796 pbCodeBuf[off++] = X86_OP_REX_R;
5797 pbCodeBuf[off++] = 0x38;
5798 }
5799 else
5800 {
5801 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5802 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5803 else
5804 {
5805 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5806 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5807 else
5808 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5809 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5810 if (idxReg >= 8)
5811 pbCodeBuf[off++] = X86_OP_REX_R;
5812 }
5813 pbCodeBuf[off++] = 0x39;
5814 }
5815 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5816
5817 /* je/jz +1 */
5818 pbCodeBuf[off++] = 0x74;
5819 pbCodeBuf[off++] = 0x01;
5820
5821 /* int3 */
5822 pbCodeBuf[off++] = 0xcc;
5823
5824 /* For values smaller than the register size, we must check that the rest
5825 of the register is all zeros. */
5826 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5827 {
5828 /* test reg64, imm32 */
5829 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5830 pbCodeBuf[off++] = 0xf7;
5831 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5832 pbCodeBuf[off++] = 0;
5833 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5834 pbCodeBuf[off++] = 0xff;
5835 pbCodeBuf[off++] = 0xff;
5836
5837 /* je/jz +1 */
5838 pbCodeBuf[off++] = 0x74;
5839 pbCodeBuf[off++] = 0x01;
5840
5841 /* int3 */
5842 pbCodeBuf[off++] = 0xcc;
5843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5844 }
5845 else
5846 {
5847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5848 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5849 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5850 }
5851
5852# elif defined(RT_ARCH_ARM64)
5853 /* mov TMP0, [gstreg] */
5854 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5855
5856 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5857 /* sub tmp0, tmp0, idxReg */
5858 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5859 /* cbz tmp0, +1 */
5860 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5861 /* brk #0x1000+enmGstReg */
5862 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5863 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5864
5865# else
5866# error "Port me!"
5867# endif
5868 return off;
5869}
5870
5871
5872# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5873# ifdef RT_ARCH_AMD64
5874/**
5875 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5876 */
5877DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5878{
5879 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5880 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5881 if (idxSimdReg >= 8)
5882 pbCodeBuf[off++] = X86_OP_REX_R;
5883 pbCodeBuf[off++] = 0x0f;
5884 pbCodeBuf[off++] = 0x38;
5885 pbCodeBuf[off++] = 0x29;
5886 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5887
5888 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5889 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5890 pbCodeBuf[off++] = X86_OP_REX_W
5891 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5892 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5893 pbCodeBuf[off++] = 0x0f;
5894 pbCodeBuf[off++] = 0x3a;
5895 pbCodeBuf[off++] = 0x16;
5896 pbCodeBuf[off++] = 0xeb;
5897 pbCodeBuf[off++] = 0x00;
5898
5899 /* cmp tmp0, 0xffffffffffffffff. */
5900 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5901 pbCodeBuf[off++] = 0x83;
5902 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5903 pbCodeBuf[off++] = 0xff;
5904
5905 /* je/jz +1 */
5906 pbCodeBuf[off++] = 0x74;
5907 pbCodeBuf[off++] = 0x01;
5908
5909 /* int3 */
5910 pbCodeBuf[off++] = 0xcc;
5911
5912 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5913 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5914 pbCodeBuf[off++] = X86_OP_REX_W
5915 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5916 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5917 pbCodeBuf[off++] = 0x0f;
5918 pbCodeBuf[off++] = 0x3a;
5919 pbCodeBuf[off++] = 0x16;
5920 pbCodeBuf[off++] = 0xeb;
5921 pbCodeBuf[off++] = 0x01;
5922
5923 /* cmp tmp0, 0xffffffffffffffff. */
5924 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5925 pbCodeBuf[off++] = 0x83;
5926 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5927 pbCodeBuf[off++] = 0xff;
5928
5929 /* je/jz +1 */
5930 pbCodeBuf[off++] = 0x74;
5931 pbCodeBuf[off++] = 0x01;
5932
5933 /* int3 */
5934 pbCodeBuf[off++] = 0xcc;
5935
5936 return off;
5937}
5938# endif
5939
5940
5941/**
5942 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5943 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5944 * instruction if that's not the case.
5945 *
5946 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5947 * Trashes EFLAGS on AMD64.
5948 */
5949DECL_HIDDEN_THROW(uint32_t)
5950iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5951 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5952{
5953 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5954 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5955 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5956 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5957 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5958 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5959 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5960 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5961 return off;
5962
5963# ifdef RT_ARCH_AMD64
5964 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5965 {
5966 /* movdqa vectmp0, idxSimdReg */
5967 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5968
5969 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5970
5971 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5972 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5973 }
5974
5975 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5976 {
5977 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5978 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5979
5980 /* vextracti128 vectmp0, idxSimdReg, 1 */
5981 pbCodeBuf[off++] = X86_OP_VEX3;
5982 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5983 | X86_OP_VEX3_BYTE1_X
5984 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5985 | 0x03; /* Opcode map */
5986 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5987 pbCodeBuf[off++] = 0x39;
5988 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5989 pbCodeBuf[off++] = 0x01;
5990
5991 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5992 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5993 }
5994# elif defined(RT_ARCH_ARM64)
5995 /* mov vectmp0, [gstreg] */
5996 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5997
5998 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5999 {
6000 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6001 /* eor vectmp0, vectmp0, idxSimdReg */
6002 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6003 /* uaddlv vectmp0, vectmp0.16B */
6004 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6005 /* umov tmp0, vectmp0.H[0] */
6006 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6007 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6008 /* cbz tmp0, +1 */
6009 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6010 /* brk #0x1000+enmGstReg */
6011 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6012 }
6013
6014 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6015 {
6016 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6017 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6018 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6019 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6020 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6021 /* umov tmp0, (vectmp0 + 1).H[0] */
6022 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6023 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6024 /* cbz tmp0, +1 */
6025 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6026 /* brk #0x1000+enmGstReg */
6027 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6028 }
6029
6030# else
6031# error "Port me!"
6032# endif
6033
6034 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6035 return off;
6036}
6037# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6038
6039
6040/**
6041 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6042 * important bits.
6043 *
6044 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6045 * Trashes EFLAGS on AMD64.
6046 */
6047DECL_HIDDEN_THROW(uint32_t)
6048iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6049{
6050 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6051 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6052 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6053 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6054
6055#ifdef RT_ARCH_AMD64
6056 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6057
6058 /* je/jz +1 */
6059 pbCodeBuf[off++] = 0x74;
6060 pbCodeBuf[off++] = 0x01;
6061
6062 /* int3 */
6063 pbCodeBuf[off++] = 0xcc;
6064
6065# elif defined(RT_ARCH_ARM64)
6066 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6067
6068 /* b.eq +1 */
6069 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6070 /* brk #0x2000 */
6071 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6072
6073# else
6074# error "Port me!"
6075# endif
6076 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6077
6078 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6079 return off;
6080}
6081
6082#endif /* VBOX_STRICT */
6083
6084
6085#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6086/**
6087 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6088 */
6089DECL_HIDDEN_THROW(uint32_t)
6090iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6091{
6092 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6093
6094 fEflNeeded &= X86_EFL_STATUS_BITS;
6095 if (fEflNeeded)
6096 {
6097# ifdef RT_ARCH_AMD64
6098 /* test dword [pVCpu + offVCpu], imm32 */
6099 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6100 if (fEflNeeded <= 0xff)
6101 {
6102 pCodeBuf[off++] = 0xf6;
6103 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6104 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6105 }
6106 else
6107 {
6108 pCodeBuf[off++] = 0xf7;
6109 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6110 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6111 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6112 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6113 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6114 }
6115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6116
6117# else
6118 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6119 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6120 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6121# ifdef RT_ARCH_ARM64
6122 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6123 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6124# else
6125# error "Port me!"
6126# endif
6127 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6128# endif
6129 }
6130 return off;
6131}
6132#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6133
6134
6135/**
6136 * Emits a code for checking the return code of a call and rcPassUp, returning
6137 * from the code if either are non-zero.
6138 */
6139DECL_HIDDEN_THROW(uint32_t)
6140iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6141{
6142#ifdef RT_ARCH_AMD64
6143 /*
6144 * AMD64: eax = call status code.
6145 */
6146
6147 /* edx = rcPassUp */
6148 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6149# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6150 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6151# endif
6152
6153 /* edx = eax | rcPassUp */
6154 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6155 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6156 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6157 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6158
6159 /* Jump to non-zero status return path. */
6160 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6161
6162 /* done. */
6163
6164#elif RT_ARCH_ARM64
6165 /*
6166 * ARM64: w0 = call status code.
6167 */
6168# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6169 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6170# endif
6171 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6172
6173 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6174
6175 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6176
6177 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6178 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6179 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6180
6181#else
6182# error "port me"
6183#endif
6184 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6185 RT_NOREF_PV(idxInstr);
6186 return off;
6187}
6188
6189
6190/**
6191 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6192 * raising a \#GP(0) if it isn't.
6193 *
6194 * @returns New code buffer offset, UINT32_MAX on failure.
6195 * @param pReNative The native recompile state.
6196 * @param off The code buffer offset.
6197 * @param idxAddrReg The host register with the address to check.
6198 * @param idxInstr The current instruction.
6199 */
6200DECL_HIDDEN_THROW(uint32_t)
6201iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6202{
6203 /*
6204 * Make sure we don't have any outstanding guest register writes as we may
6205 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6206 */
6207 off = iemNativeRegFlushPendingWrites(pReNative, off);
6208
6209#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6210 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6211#else
6212 RT_NOREF(idxInstr);
6213#endif
6214
6215#ifdef RT_ARCH_AMD64
6216 /*
6217 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6218 * return raisexcpt();
6219 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6220 */
6221 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6222
6223 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6224 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6225 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6226 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6227 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeExitReason_RaiseGp0);
6228
6229 iemNativeRegFreeTmp(pReNative, iTmpReg);
6230
6231#elif defined(RT_ARCH_ARM64)
6232 /*
6233 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6234 * return raisexcpt();
6235 * ----
6236 * mov x1, 0x800000000000
6237 * add x1, x0, x1
6238 * cmp xzr, x1, lsr 48
6239 * b.ne .Lraisexcpt
6240 */
6241 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6242
6243 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6244 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6245 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6246 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeExitReason_RaiseGp0);
6247
6248 iemNativeRegFreeTmp(pReNative, iTmpReg);
6249
6250#else
6251# error "Port me"
6252#endif
6253 return off;
6254}
6255
6256
6257/**
6258 * Emits code to check if that the content of @a idxAddrReg is within the limit
6259 * of CS, raising a \#GP(0) if it isn't.
6260 *
6261 * @returns New code buffer offset; throws VBox status code on error.
6262 * @param pReNative The native recompile state.
6263 * @param off The code buffer offset.
6264 * @param idxAddrReg The host register (32-bit) with the address to
6265 * check.
6266 * @param idxInstr The current instruction.
6267 */
6268DECL_HIDDEN_THROW(uint32_t)
6269iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6270 uint8_t idxAddrReg, uint8_t idxInstr)
6271{
6272 /*
6273 * Make sure we don't have any outstanding guest register writes as we may
6274 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6275 */
6276 off = iemNativeRegFlushPendingWrites(pReNative, off);
6277
6278#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6279 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6280#else
6281 RT_NOREF(idxInstr);
6282#endif
6283
6284 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6285 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6286 kIemNativeGstRegUse_ReadOnly);
6287
6288 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6289 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6290
6291 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6292 return off;
6293}
6294
6295
6296/**
6297 * Emits a call to a CImpl function or something similar.
6298 */
6299DECL_HIDDEN_THROW(uint32_t)
6300iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6301 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6302{
6303 /* Writeback everything. */
6304 off = iemNativeRegFlushPendingWrites(pReNative, off);
6305
6306 /*
6307 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6308 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6309 */
6310 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6311 fGstShwFlush
6312 | RT_BIT_64(kIemNativeGstReg_Pc)
6313 | RT_BIT_64(kIemNativeGstReg_EFlags));
6314 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6315
6316 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6317
6318 /*
6319 * Load the parameters.
6320 */
6321#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6322 /* Special code the hidden VBOXSTRICTRC pointer. */
6323 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6324 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6325 if (cAddParams > 0)
6326 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6327 if (cAddParams > 1)
6328 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6329 if (cAddParams > 2)
6330 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6331 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6332
6333#else
6334 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6335 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6336 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6337 if (cAddParams > 0)
6338 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6339 if (cAddParams > 1)
6340 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6341 if (cAddParams > 2)
6342# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6343 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6344# else
6345 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6346# endif
6347#endif
6348
6349 /*
6350 * Make the call.
6351 */
6352 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6353
6354#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6355 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6356#endif
6357
6358 /*
6359 * Check the status code.
6360 */
6361 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6362}
6363
6364
6365/**
6366 * Emits a call to a threaded worker function.
6367 */
6368DECL_HIDDEN_THROW(uint32_t)
6369iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6370{
6371 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6372
6373 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6374 off = iemNativeRegFlushPendingWrites(pReNative, off);
6375
6376 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6377 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6378
6379#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6380 /* The threaded function may throw / long jmp, so set current instruction
6381 number if we're counting. */
6382 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6383#endif
6384
6385 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6386
6387#ifdef RT_ARCH_AMD64
6388 /* Load the parameters and emit the call. */
6389# ifdef RT_OS_WINDOWS
6390# ifndef VBOXSTRICTRC_STRICT_ENABLED
6391 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6392 if (cParams > 0)
6393 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6394 if (cParams > 1)
6395 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6396 if (cParams > 2)
6397 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6398# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6399 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6400 if (cParams > 0)
6401 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6402 if (cParams > 1)
6403 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6404 if (cParams > 2)
6405 {
6406 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6407 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6408 }
6409 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6410# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6411# else
6412 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6413 if (cParams > 0)
6414 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6415 if (cParams > 1)
6416 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6417 if (cParams > 2)
6418 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6419# endif
6420
6421 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6422
6423# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6424 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6425# endif
6426
6427#elif RT_ARCH_ARM64
6428 /*
6429 * ARM64:
6430 */
6431 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6432 if (cParams > 0)
6433 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6434 if (cParams > 1)
6435 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6436 if (cParams > 2)
6437 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6438
6439 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6440
6441#else
6442# error "port me"
6443#endif
6444
6445 /*
6446 * Check the status code.
6447 */
6448 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6449
6450 return off;
6451}
6452
6453#ifdef VBOX_WITH_STATISTICS
6454
6455/**
6456 * Emits code to update the thread call statistics.
6457 */
6458DECL_INLINE_THROW(uint32_t)
6459iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6460{
6461 /*
6462 * Update threaded function stats.
6463 */
6464 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6465 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6466# if defined(RT_ARCH_ARM64)
6467 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6468 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6469 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6470 iemNativeRegFreeTmp(pReNative, idxTmp1);
6471 iemNativeRegFreeTmp(pReNative, idxTmp2);
6472# else
6473 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6474# endif
6475 return off;
6476}
6477
6478
6479/**
6480 * Emits code to update the TB exit reason statistics.
6481 */
6482DECL_INLINE_THROW(uint32_t)
6483iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6484{
6485 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6486 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6487 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6488 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6489 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6490
6491 return off;
6492}
6493
6494#endif /* VBOX_WITH_STATISTICS */
6495
6496/**
6497 * Worker for iemNativeEmitReturnBreakViaLookup.
6498 */
6499static uint32_t iemNativeEmitViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabelReturnBreak,
6500 IEMNATIVELABELTYPE enmLabel, uintptr_t pfnHelper)
6501{
6502 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
6503 if (idxLabel != UINT32_MAX)
6504 {
6505 iemNativeLabelDefine(pReNative, idxLabel, off);
6506
6507 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6508 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6509
6510 /* Jump to ReturnBreak if the return register is NULL. */
6511 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6512 true /*f64Bit*/, idxLabelReturnBreak);
6513
6514 /* Okay, continue executing the next TB. */
6515 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6516 }
6517 return off;
6518}
6519
6520/**
6521 * Emits the code at the ReturnBreakViaLookup, ReturnBreakViaLookupWithIrq,
6522 * ReturnBreakViaLookupWithTlb and ReturnBreakViaLookupWithTlbAndIrq labels
6523 * (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS or jumps to the next TB).
6524 */
6525static uint32_t iemNativeEmitReturnBreakViaLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6526{
6527 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
6528
6529 /*
6530 * The lookup table index is in IEMNATIVE_CALL_ARG1_GREG for all.
6531 * The GCPhysPc is in IEMNATIVE_CALL_ARG2_GREG for ReturnBreakViaLookupWithPc.
6532 */
6533 off = iemNativeEmitViaLookupDoOne(pReNative, off, idxLabelReturnBreak, kIemNativeLabelType_ReturnBreakViaLookup,
6534 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/>);
6535 off = iemNativeEmitViaLookupDoOne(pReNative, off, idxLabelReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
6536 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/>);
6537 off = iemNativeEmitViaLookupDoOne(pReNative, off, idxLabelReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
6538 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/>);
6539 off = iemNativeEmitViaLookupDoOne(pReNative, off, idxLabelReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
6540 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/>);
6541 return off;
6542}
6543
6544
6545/**
6546 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6547 */
6548static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6549{
6550 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6551 if (idxLabel != UINT32_MAX)
6552 {
6553 iemNativeLabelDefine(pReNative, idxLabel, off);
6554 /* set the return status */
6555 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6556 /* jump back to the return sequence. */
6557 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6558 }
6559 return off;
6560}
6561
6562
6563/**
6564 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6565 */
6566static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6567{
6568 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6569 if (idxLabel != UINT32_MAX)
6570 {
6571 iemNativeLabelDefine(pReNative, idxLabel, off);
6572 /* set the return status */
6573 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6574 /* jump back to the return sequence. */
6575 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6576 }
6577 return off;
6578}
6579
6580
6581/**
6582 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6583 */
6584static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6585{
6586 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6587 if (idxLabel != UINT32_MAX)
6588 {
6589 iemNativeLabelDefine(pReNative, idxLabel, off);
6590 /* set the return status */
6591 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6592 /* jump back to the return sequence. */
6593 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6594 }
6595 return off;
6596}
6597
6598
6599/**
6600 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6601 */
6602static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6603{
6604 /*
6605 * Generate the rc + rcPassUp fiddling code if needed.
6606 */
6607 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6608 if (idxLabel != UINT32_MAX)
6609 {
6610 iemNativeLabelDefine(pReNative, idxLabel, off);
6611
6612 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6613#ifdef RT_ARCH_AMD64
6614# ifdef RT_OS_WINDOWS
6615# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6616 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6617# endif
6618 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6619 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6620# else
6621 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6622 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6623# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6624 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6625# endif
6626# endif
6627# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6628 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6629# endif
6630
6631#else
6632 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6634 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6635#endif
6636
6637 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6638 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6639 }
6640 return off;
6641}
6642
6643
6644/**
6645 * Emits a standard epilog.
6646 */
6647static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6648{
6649 *pidxReturnLabel = UINT32_MAX;
6650
6651 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6652 off = iemNativeRegFlushPendingWrites(pReNative, off);
6653
6654 /*
6655 * Successful return, so clear the return register (eax, w0).
6656 */
6657 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6658 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6659
6660 /*
6661 * Define label for common return point.
6662 */
6663 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6664 *pidxReturnLabel = idxReturn;
6665
6666 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6667
6668#ifdef IEMNATIVE_WITH_RECOMPILER_EPILOGUE_SINGLETON
6669 //off = iemNativeEmitBrk(pReNative, off, 0x7777);
6670 off = iemNativeEmitJmpImm(pReNative, off, (uintptr_t)iemNativeTbEpilog);
6671#else
6672 /*
6673 * Restore registers and return.
6674 */
6675# ifdef RT_ARCH_AMD64
6676 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6677
6678 /* Reposition esp at the r15 restore point. */
6679 pbCodeBuf[off++] = X86_OP_REX_W;
6680 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6681 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6682 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6683
6684 /* Pop non-volatile registers and return */
6685 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6686 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6687 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6688 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6689 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6690 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6691 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6692 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6693# ifdef RT_OS_WINDOWS
6694 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6695 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6696# endif
6697 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6698 pbCodeBuf[off++] = 0xc9; /* leave */
6699 pbCodeBuf[off++] = 0xc3; /* ret */
6700 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6701
6702# elif RT_ARCH_ARM64
6703 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6704
6705 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6706 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6707 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6708 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6709 IEMNATIVE_FRAME_VAR_SIZE / 8);
6710 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6711 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6712 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6713 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6714 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6715 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6716 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6717 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6718 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6719 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6720 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6721 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6722
6723 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6724 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6725 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6726 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6727
6728 /* retab / ret */
6729# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6730 if (1)
6731 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6732 else
6733# endif
6734 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6735
6736# else
6737# error "port me"
6738# endif
6739 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6740#endif /* IEMNATIVE_WITH_RECOMPILER_EPILOGUE_SINGLETON */
6741
6742 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6743 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6744
6745 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6746}
6747
6748
6749#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
6750/**
6751 * Emits a standard prolog.
6752 */
6753static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6754{
6755#ifdef RT_ARCH_AMD64
6756 /*
6757 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6758 * reserving 64 bytes for stack variables plus 4 non-register argument
6759 * slots. Fixed register assignment: xBX = pReNative;
6760 *
6761 * Since we always do the same register spilling, we can use the same
6762 * unwind description for all the code.
6763 */
6764 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6765 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6766 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6767 pbCodeBuf[off++] = 0x8b;
6768 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6769 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6770 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6771# ifdef RT_OS_WINDOWS
6772 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6773 pbCodeBuf[off++] = 0x8b;
6774 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6775 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6776 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6777# else
6778 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6779 pbCodeBuf[off++] = 0x8b;
6780 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6781# endif
6782 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6783 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6784 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6785 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6786 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6787 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6788 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6789 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6790
6791# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6792 /* Save the frame pointer. */
6793 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6794# endif
6795
6796 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6797 X86_GREG_xSP,
6798 IEMNATIVE_FRAME_ALIGN_SIZE
6799 + IEMNATIVE_FRAME_VAR_SIZE
6800 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6801 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6802 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6803 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6804 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6805
6806#elif RT_ARCH_ARM64
6807 /*
6808 * We set up a stack frame exactly like on x86, only we have to push the
6809 * return address our selves here. We save all non-volatile registers.
6810 */
6811 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6812
6813# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6814 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6815 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6816 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6817 /* pacibsp */
6818 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6819# endif
6820
6821 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6822 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6823 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6824 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6825 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6826 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6827 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6828 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6829 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6830 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6831 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6832 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6833 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6834 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6835 /* Save the BP and LR (ret address) registers at the top of the frame. */
6836 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6837 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6838 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6839 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6840 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6841 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6842
6843 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6844 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6845
6846 /* mov r28, r0 */
6847 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6848 /* mov r27, r1 */
6849 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6850
6851# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6852 /* Save the frame pointer. */
6853 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6854 ARMV8_A64_REG_X2);
6855# endif
6856
6857#else
6858# error "port me"
6859#endif
6860 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6861 return off;
6862}
6863#endif
6864
6865
6866/*********************************************************************************************************************************
6867* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6868*********************************************************************************************************************************/
6869
6870/**
6871 * Internal work that allocates a variable with kind set to
6872 * kIemNativeVarKind_Invalid and no current stack allocation.
6873 *
6874 * The kind will either be set by the caller or later when the variable is first
6875 * assigned a value.
6876 *
6877 * @returns Unpacked index.
6878 * @internal
6879 */
6880static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6881{
6882 Assert(cbType > 0 && cbType <= 64);
6883 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6884 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6885 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6886 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6887 pReNative->Core.aVars[idxVar].cbVar = cbType;
6888 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6889 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6890 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6891 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6892 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6893 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6894 pReNative->Core.aVars[idxVar].u.uValue = 0;
6895#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6896 pReNative->Core.aVars[idxVar].fSimdReg = false;
6897#endif
6898 return idxVar;
6899}
6900
6901
6902/**
6903 * Internal work that allocates an argument variable w/o setting enmKind.
6904 *
6905 * @returns Unpacked index.
6906 * @internal
6907 */
6908static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6909{
6910 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6911 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6912 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6913
6914 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6915 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6916 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6917 return idxVar;
6918}
6919
6920
6921/**
6922 * Gets the stack slot for a stack variable, allocating one if necessary.
6923 *
6924 * Calling this function implies that the stack slot will contain a valid
6925 * variable value. The caller deals with any register currently assigned to the
6926 * variable, typically by spilling it into the stack slot.
6927 *
6928 * @returns The stack slot number.
6929 * @param pReNative The recompiler state.
6930 * @param idxVar The variable.
6931 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6932 */
6933DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6934{
6935 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6936 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6937 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6938
6939 /* Already got a slot? */
6940 uint8_t const idxStackSlot = pVar->idxStackSlot;
6941 if (idxStackSlot != UINT8_MAX)
6942 {
6943 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6944 return idxStackSlot;
6945 }
6946
6947 /*
6948 * A single slot is easy to allocate.
6949 * Allocate them from the top end, closest to BP, to reduce the displacement.
6950 */
6951 if (pVar->cbVar <= sizeof(uint64_t))
6952 {
6953 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6954 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6955 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6956 pVar->idxStackSlot = (uint8_t)iSlot;
6957 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6958 return (uint8_t)iSlot;
6959 }
6960
6961 /*
6962 * We need more than one stack slot.
6963 *
6964 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6965 */
6966 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6967 Assert(pVar->cbVar <= 64);
6968 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6969 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6970 uint32_t bmStack = pReNative->Core.bmStack;
6971 while (bmStack != UINT32_MAX)
6972 {
6973 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6974 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6975 iSlot = (iSlot - 1) & ~fBitAlignMask;
6976 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6977 {
6978 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6979 pVar->idxStackSlot = (uint8_t)iSlot;
6980 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6981 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6982 return (uint8_t)iSlot;
6983 }
6984
6985 bmStack |= (fBitAllocMask << iSlot);
6986 }
6987 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6988}
6989
6990
6991/**
6992 * Changes the variable to a stack variable.
6993 *
6994 * Currently this is s only possible to do the first time the variable is used,
6995 * switching later is can be implemented but not done.
6996 *
6997 * @param pReNative The recompiler state.
6998 * @param idxVar The variable.
6999 * @throws VERR_IEM_VAR_IPE_2
7000 */
7001DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7002{
7003 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7004 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7005 if (pVar->enmKind != kIemNativeVarKind_Stack)
7006 {
7007 /* We could in theory transition from immediate to stack as well, but it
7008 would involve the caller doing work storing the value on the stack. So,
7009 till that's required we only allow transition from invalid. */
7010 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7011 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7012 pVar->enmKind = kIemNativeVarKind_Stack;
7013
7014 /* Note! We don't allocate a stack slot here, that's only done when a
7015 slot is actually needed to hold a variable value. */
7016 }
7017}
7018
7019
7020/**
7021 * Sets it to a variable with a constant value.
7022 *
7023 * This does not require stack storage as we know the value and can always
7024 * reload it, unless of course it's referenced.
7025 *
7026 * @param pReNative The recompiler state.
7027 * @param idxVar The variable.
7028 * @param uValue The immediate value.
7029 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7030 */
7031DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7032{
7033 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7034 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7035 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7036 {
7037 /* Only simple transitions for now. */
7038 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7039 pVar->enmKind = kIemNativeVarKind_Immediate;
7040 }
7041 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7042
7043 pVar->u.uValue = uValue;
7044 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7045 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7046 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7047}
7048
7049
7050/**
7051 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7052 *
7053 * This does not require stack storage as we know the value and can always
7054 * reload it. Loading is postponed till needed.
7055 *
7056 * @param pReNative The recompiler state.
7057 * @param idxVar The variable. Unpacked.
7058 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7059 *
7060 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7061 * @internal
7062 */
7063static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7064{
7065 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7066 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7067
7068 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7069 {
7070 /* Only simple transitions for now. */
7071 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7072 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7073 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7074 }
7075 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7076
7077 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7078
7079 /* Update the other variable, ensure it's a stack variable. */
7080 /** @todo handle variables with const values... that'll go boom now. */
7081 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7082 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7083}
7084
7085
7086/**
7087 * Sets the variable to a reference (pointer) to a guest register reference.
7088 *
7089 * This does not require stack storage as we know the value and can always
7090 * reload it. Loading is postponed till needed.
7091 *
7092 * @param pReNative The recompiler state.
7093 * @param idxVar The variable.
7094 * @param enmRegClass The class guest registers to reference.
7095 * @param idxReg The register within @a enmRegClass to reference.
7096 *
7097 * @throws VERR_IEM_VAR_IPE_2
7098 */
7099DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7100 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7101{
7102 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7103 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7104
7105 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7106 {
7107 /* Only simple transitions for now. */
7108 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7109 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7110 }
7111 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7112
7113 pVar->u.GstRegRef.enmClass = enmRegClass;
7114 pVar->u.GstRegRef.idx = idxReg;
7115}
7116
7117
7118DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7119{
7120 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7121}
7122
7123
7124DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7125{
7126 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7127
7128 /* Since we're using a generic uint64_t value type, we must truncate it if
7129 the variable is smaller otherwise we may end up with too large value when
7130 scaling up a imm8 w/ sign-extension.
7131
7132 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7133 in the bios, bx=1) when running on arm, because clang expect 16-bit
7134 register parameters to have bits 16 and up set to zero. Instead of
7135 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7136 CF value in the result. */
7137 switch (cbType)
7138 {
7139 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7140 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7141 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7142 }
7143 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7144 return idxVar;
7145}
7146
7147
7148DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7149{
7150 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7151 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7152 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7153 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7154 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7155 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7156
7157 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7158 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7159 return idxArgVar;
7160}
7161
7162
7163DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7164{
7165 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7166 /* Don't set to stack now, leave that to the first use as for instance
7167 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7168 return idxVar;
7169}
7170
7171
7172DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7173{
7174 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7175
7176 /* Since we're using a generic uint64_t value type, we must truncate it if
7177 the variable is smaller otherwise we may end up with too large value when
7178 scaling up a imm8 w/ sign-extension. */
7179 switch (cbType)
7180 {
7181 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7182 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7183 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7184 }
7185 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7186 return idxVar;
7187}
7188
7189
7190DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
7191{
7192 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7193 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7194
7195 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7196 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7197
7198 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7199
7200 /* Truncate the value to this variables size. */
7201 switch (cbType)
7202 {
7203 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7204 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7205 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7206 }
7207
7208 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7209 iemNativeVarRegisterRelease(pReNative, idxVar);
7210 return idxVar;
7211}
7212
7213
7214/**
7215 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7216 * fixed till we call iemNativeVarRegisterRelease.
7217 *
7218 * @returns The host register number.
7219 * @param pReNative The recompiler state.
7220 * @param idxVar The variable.
7221 * @param poff Pointer to the instruction buffer offset.
7222 * In case a register needs to be freed up or the value
7223 * loaded off the stack.
7224 * @param fInitialized Set if the variable must already have been initialized.
7225 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7226 * the case.
7227 * @param idxRegPref Preferred register number or UINT8_MAX.
7228 */
7229DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7230 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7231{
7232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7233 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7234 Assert(pVar->cbVar <= 8);
7235 Assert(!pVar->fRegAcquired);
7236
7237 uint8_t idxReg = pVar->idxReg;
7238 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7239 {
7240 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7241 && pVar->enmKind < kIemNativeVarKind_End);
7242 pVar->fRegAcquired = true;
7243 return idxReg;
7244 }
7245
7246 /*
7247 * If the kind of variable has not yet been set, default to 'stack'.
7248 */
7249 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7250 && pVar->enmKind < kIemNativeVarKind_End);
7251 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7252 iemNativeVarSetKindToStack(pReNative, idxVar);
7253
7254 /*
7255 * We have to allocate a register for the variable, even if its a stack one
7256 * as we don't know if there are modification being made to it before its
7257 * finalized (todo: analyze and insert hints about that?).
7258 *
7259 * If we can, we try get the correct register for argument variables. This
7260 * is assuming that most argument variables are fetched as close as possible
7261 * to the actual call, so that there aren't any interfering hidden calls
7262 * (memory accesses, etc) inbetween.
7263 *
7264 * If we cannot or it's a variable, we make sure no argument registers
7265 * that will be used by this MC block will be allocated here, and we always
7266 * prefer non-volatile registers to avoid needing to spill stuff for internal
7267 * call.
7268 */
7269 /** @todo Detect too early argument value fetches and warn about hidden
7270 * calls causing less optimal code to be generated in the python script. */
7271
7272 uint8_t const uArgNo = pVar->uArgNo;
7273 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7274 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7275 {
7276 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7277
7278#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7279 /* Writeback any dirty shadow registers we are about to unshadow. */
7280 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7281#endif
7282
7283 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7284 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7285 }
7286 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7287 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7288 {
7289 /** @todo there must be a better way for this and boot cArgsX? */
7290 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7291 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7292 & ~pReNative->Core.bmHstRegsWithGstShadow
7293 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7294 & fNotArgsMask;
7295 if (fRegs)
7296 {
7297 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7298 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7299 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7300 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7301 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7302 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7303 }
7304 else
7305 {
7306 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7307 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7308 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7309 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7310 }
7311 }
7312 else
7313 {
7314 idxReg = idxRegPref;
7315 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7316 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7317 }
7318 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7319 pVar->idxReg = idxReg;
7320
7321#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7322 pVar->fSimdReg = false;
7323#endif
7324
7325 /*
7326 * Load it off the stack if we've got a stack slot.
7327 */
7328 uint8_t const idxStackSlot = pVar->idxStackSlot;
7329 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7330 {
7331 Assert(fInitialized);
7332 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7333 switch (pVar->cbVar)
7334 {
7335 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7336 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7337 case 3: AssertFailed(); RT_FALL_THRU();
7338 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7339 default: AssertFailed(); RT_FALL_THRU();
7340 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7341 }
7342 }
7343 else
7344 {
7345 Assert(idxStackSlot == UINT8_MAX);
7346 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7347 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7348 else
7349 {
7350 /*
7351 * Convert from immediate to stack/register. This is currently only
7352 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7353 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7354 */
7355 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7356 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7357 idxVar, idxReg, pVar->u.uValue));
7358 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7359 pVar->enmKind = kIemNativeVarKind_Stack;
7360 }
7361 }
7362
7363 pVar->fRegAcquired = true;
7364 return idxReg;
7365}
7366
7367
7368#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7369/**
7370 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7371 * fixed till we call iemNativeVarRegisterRelease.
7372 *
7373 * @returns The host register number.
7374 * @param pReNative The recompiler state.
7375 * @param idxVar The variable.
7376 * @param poff Pointer to the instruction buffer offset.
7377 * In case a register needs to be freed up or the value
7378 * loaded off the stack.
7379 * @param fInitialized Set if the variable must already have been initialized.
7380 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7381 * the case.
7382 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7383 */
7384DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7385 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7386{
7387 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7388 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7389 Assert( pVar->cbVar == sizeof(RTUINT128U)
7390 || pVar->cbVar == sizeof(RTUINT256U));
7391 Assert(!pVar->fRegAcquired);
7392
7393 uint8_t idxReg = pVar->idxReg;
7394 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7395 {
7396 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7397 && pVar->enmKind < kIemNativeVarKind_End);
7398 pVar->fRegAcquired = true;
7399 return idxReg;
7400 }
7401
7402 /*
7403 * If the kind of variable has not yet been set, default to 'stack'.
7404 */
7405 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7406 && pVar->enmKind < kIemNativeVarKind_End);
7407 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7408 iemNativeVarSetKindToStack(pReNative, idxVar);
7409
7410 /*
7411 * We have to allocate a register for the variable, even if its a stack one
7412 * as we don't know if there are modification being made to it before its
7413 * finalized (todo: analyze and insert hints about that?).
7414 *
7415 * If we can, we try get the correct register for argument variables. This
7416 * is assuming that most argument variables are fetched as close as possible
7417 * to the actual call, so that there aren't any interfering hidden calls
7418 * (memory accesses, etc) inbetween.
7419 *
7420 * If we cannot or it's a variable, we make sure no argument registers
7421 * that will be used by this MC block will be allocated here, and we always
7422 * prefer non-volatile registers to avoid needing to spill stuff for internal
7423 * call.
7424 */
7425 /** @todo Detect too early argument value fetches and warn about hidden
7426 * calls causing less optimal code to be generated in the python script. */
7427
7428 uint8_t const uArgNo = pVar->uArgNo;
7429 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7430
7431 /* SIMD is bit simpler for now because there is no support for arguments. */
7432 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7433 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7434 {
7435 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7436 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7437 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7438 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7439 & fNotArgsMask;
7440 if (fRegs)
7441 {
7442 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7443 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7444 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7445 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7446 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7447 }
7448 else
7449 {
7450 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7451 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7452 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7453 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7454 }
7455 }
7456 else
7457 {
7458 idxReg = idxRegPref;
7459 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7460 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7461 }
7462 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7463
7464 pVar->fSimdReg = true;
7465 pVar->idxReg = idxReg;
7466
7467 /*
7468 * Load it off the stack if we've got a stack slot.
7469 */
7470 uint8_t const idxStackSlot = pVar->idxStackSlot;
7471 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7472 {
7473 Assert(fInitialized);
7474 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7475 switch (pVar->cbVar)
7476 {
7477 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7478 default: AssertFailed(); RT_FALL_THRU();
7479 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7480 }
7481 }
7482 else
7483 {
7484 Assert(idxStackSlot == UINT8_MAX);
7485 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7486 }
7487 pVar->fRegAcquired = true;
7488 return idxReg;
7489}
7490#endif
7491
7492
7493/**
7494 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7495 * guest register.
7496 *
7497 * This function makes sure there is a register for it and sets it to be the
7498 * current shadow copy of @a enmGstReg.
7499 *
7500 * @returns The host register number.
7501 * @param pReNative The recompiler state.
7502 * @param idxVar The variable.
7503 * @param enmGstReg The guest register this variable will be written to
7504 * after this call.
7505 * @param poff Pointer to the instruction buffer offset.
7506 * In case a register needs to be freed up or if the
7507 * variable content needs to be loaded off the stack.
7508 *
7509 * @note We DO NOT expect @a idxVar to be an argument variable,
7510 * because we can only in the commit stage of an instruction when this
7511 * function is used.
7512 */
7513DECL_HIDDEN_THROW(uint8_t)
7514iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7515{
7516 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7517 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7518 Assert(!pVar->fRegAcquired);
7519 AssertMsgStmt( pVar->cbVar <= 8
7520 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7521 || pVar->enmKind == kIemNativeVarKind_Stack),
7522 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7523 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7524 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7525
7526 /*
7527 * This shouldn't ever be used for arguments, unless it's in a weird else
7528 * branch that doesn't do any calling and even then it's questionable.
7529 *
7530 * However, in case someone writes crazy wrong MC code and does register
7531 * updates before making calls, just use the regular register allocator to
7532 * ensure we get a register suitable for the intended argument number.
7533 */
7534 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7535
7536 /*
7537 * If there is already a register for the variable, we transfer/set the
7538 * guest shadow copy assignment to it.
7539 */
7540 uint8_t idxReg = pVar->idxReg;
7541 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7542 {
7543#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7544 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7545 {
7546# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7547 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7548 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7549# endif
7550 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7551 }
7552#endif
7553
7554 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7555 {
7556 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7557 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7558 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7559 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7560 }
7561 else
7562 {
7563 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7564 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7565 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7566 }
7567 /** @todo figure this one out. We need some way of making sure the register isn't
7568 * modified after this point, just in case we start writing crappy MC code. */
7569 pVar->enmGstReg = enmGstReg;
7570 pVar->fRegAcquired = true;
7571 return idxReg;
7572 }
7573 Assert(pVar->uArgNo == UINT8_MAX);
7574
7575 /*
7576 * Because this is supposed to be the commit stage, we're just tag along with the
7577 * temporary register allocator and upgrade it to a variable register.
7578 */
7579 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7580 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7581 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7582 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7583 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7584 pVar->idxReg = idxReg;
7585
7586 /*
7587 * Now we need to load the register value.
7588 */
7589 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7590 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7591 else
7592 {
7593 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7594 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7595 switch (pVar->cbVar)
7596 {
7597 case sizeof(uint64_t):
7598 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7599 break;
7600 case sizeof(uint32_t):
7601 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7602 break;
7603 case sizeof(uint16_t):
7604 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7605 break;
7606 case sizeof(uint8_t):
7607 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7608 break;
7609 default:
7610 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7611 }
7612 }
7613
7614 pVar->fRegAcquired = true;
7615 return idxReg;
7616}
7617
7618
7619/**
7620 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7621 *
7622 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7623 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7624 * requirement of flushing anything in volatile host registers when making a
7625 * call.
7626 *
7627 * @returns New @a off value.
7628 * @param pReNative The recompiler state.
7629 * @param off The code buffer position.
7630 * @param fHstRegsNotToSave Set of registers not to save & restore.
7631 */
7632DECL_HIDDEN_THROW(uint32_t)
7633iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7634{
7635 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7636 if (fHstRegs)
7637 {
7638 do
7639 {
7640 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7641 fHstRegs &= ~RT_BIT_32(idxHstReg);
7642
7643 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7644 {
7645 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7646 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7647 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7648 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7649 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7650 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7651 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7652 {
7653 case kIemNativeVarKind_Stack:
7654 {
7655 /* Temporarily spill the variable register. */
7656 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7657 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7658 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7659 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7660 continue;
7661 }
7662
7663 case kIemNativeVarKind_Immediate:
7664 case kIemNativeVarKind_VarRef:
7665 case kIemNativeVarKind_GstRegRef:
7666 /* It is weird to have any of these loaded at this point. */
7667 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7668 continue;
7669
7670 case kIemNativeVarKind_End:
7671 case kIemNativeVarKind_Invalid:
7672 break;
7673 }
7674 AssertFailed();
7675 }
7676 else
7677 {
7678 /*
7679 * Allocate a temporary stack slot and spill the register to it.
7680 */
7681 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7682 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7683 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7684 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7685 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7686 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7687 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7688 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7689 }
7690 } while (fHstRegs);
7691 }
7692#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7693
7694 /*
7695 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7696 * which would be more difficult due to spanning multiple stack slots and different sizes
7697 * (besides we only have a limited amount of slots at the moment).
7698 *
7699 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7700 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7701 */
7702 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7703
7704 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7705 if (fHstRegs)
7706 {
7707 do
7708 {
7709 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7710 fHstRegs &= ~RT_BIT_32(idxHstReg);
7711
7712 /* Fixed reserved and temporary registers don't need saving. */
7713 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7714 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7715 continue;
7716
7717 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7718
7719 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7721 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7722 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7723 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7724 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7725 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7726 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7727 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7728 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7729 {
7730 case kIemNativeVarKind_Stack:
7731 {
7732 /* Temporarily spill the variable register. */
7733 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7734 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7735 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7736 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7737 if (cbVar == sizeof(RTUINT128U))
7738 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7739 else
7740 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7741 continue;
7742 }
7743
7744 case kIemNativeVarKind_Immediate:
7745 case kIemNativeVarKind_VarRef:
7746 case kIemNativeVarKind_GstRegRef:
7747 /* It is weird to have any of these loaded at this point. */
7748 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7749 continue;
7750
7751 case kIemNativeVarKind_End:
7752 case kIemNativeVarKind_Invalid:
7753 break;
7754 }
7755 AssertFailed();
7756 } while (fHstRegs);
7757 }
7758#endif
7759 return off;
7760}
7761
7762
7763/**
7764 * Emit code to restore volatile registers after to a call to a helper.
7765 *
7766 * @returns New @a off value.
7767 * @param pReNative The recompiler state.
7768 * @param off The code buffer position.
7769 * @param fHstRegsNotToSave Set of registers not to save & restore.
7770 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7771 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7772 */
7773DECL_HIDDEN_THROW(uint32_t)
7774iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7775{
7776 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7777 if (fHstRegs)
7778 {
7779 do
7780 {
7781 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7782 fHstRegs &= ~RT_BIT_32(idxHstReg);
7783
7784 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7785 {
7786 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7787 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7788 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7789 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7790 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7791 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7792 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7793 {
7794 case kIemNativeVarKind_Stack:
7795 {
7796 /* Unspill the variable register. */
7797 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7798 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7799 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7800 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7801 continue;
7802 }
7803
7804 case kIemNativeVarKind_Immediate:
7805 case kIemNativeVarKind_VarRef:
7806 case kIemNativeVarKind_GstRegRef:
7807 /* It is weird to have any of these loaded at this point. */
7808 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7809 continue;
7810
7811 case kIemNativeVarKind_End:
7812 case kIemNativeVarKind_Invalid:
7813 break;
7814 }
7815 AssertFailed();
7816 }
7817 else
7818 {
7819 /*
7820 * Restore from temporary stack slot.
7821 */
7822 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7823 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7824 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7825 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7826
7827 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7828 }
7829 } while (fHstRegs);
7830 }
7831#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7832 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7833 if (fHstRegs)
7834 {
7835 do
7836 {
7837 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7838 fHstRegs &= ~RT_BIT_32(idxHstReg);
7839
7840 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7841 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7842 continue;
7843 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7844
7845 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7846 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7847 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7848 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7849 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7850 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7851 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7852 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7853 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7854 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7855 {
7856 case kIemNativeVarKind_Stack:
7857 {
7858 /* Unspill the variable register. */
7859 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7860 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7861 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7862 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7863
7864 if (cbVar == sizeof(RTUINT128U))
7865 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7866 else
7867 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7868 continue;
7869 }
7870
7871 case kIemNativeVarKind_Immediate:
7872 case kIemNativeVarKind_VarRef:
7873 case kIemNativeVarKind_GstRegRef:
7874 /* It is weird to have any of these loaded at this point. */
7875 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7876 continue;
7877
7878 case kIemNativeVarKind_End:
7879 case kIemNativeVarKind_Invalid:
7880 break;
7881 }
7882 AssertFailed();
7883 } while (fHstRegs);
7884 }
7885#endif
7886 return off;
7887}
7888
7889
7890/**
7891 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7892 *
7893 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7894 *
7895 * ASSUMES that @a idxVar is valid and unpacked.
7896 */
7897DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7898{
7899 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7900 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7901 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7902 {
7903 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7904 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7905 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7906 Assert(cSlots > 0);
7907 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7908 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7909 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7910 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7911 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7912 }
7913 else
7914 Assert(idxStackSlot == UINT8_MAX);
7915}
7916
7917
7918/**
7919 * Worker that frees a single variable.
7920 *
7921 * ASSUMES that @a idxVar is valid and unpacked.
7922 */
7923DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7924{
7925 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7926 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7927 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7928
7929 /* Free the host register first if any assigned. */
7930 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7931#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7932 if ( idxHstReg != UINT8_MAX
7933 && pReNative->Core.aVars[idxVar].fSimdReg)
7934 {
7935 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7936 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7937 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7938 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7939 }
7940 else
7941#endif
7942 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7943 {
7944 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7945 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7946 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7947 }
7948
7949 /* Free argument mapping. */
7950 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7951 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7952 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7953
7954 /* Free the stack slots. */
7955 iemNativeVarFreeStackSlots(pReNative, idxVar);
7956
7957 /* Free the actual variable. */
7958 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7959 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7960}
7961
7962
7963/**
7964 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7965 */
7966DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7967{
7968 while (bmVars != 0)
7969 {
7970 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7971 bmVars &= ~RT_BIT_32(idxVar);
7972
7973#if 1 /** @todo optimize by simplifying this later... */
7974 iemNativeVarFreeOneWorker(pReNative, idxVar);
7975#else
7976 /* Only need to free the host register, the rest is done as bulk updates below. */
7977 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7978 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7979 {
7980 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7981 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7982 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7983 }
7984#endif
7985 }
7986#if 0 /** @todo optimize by simplifying this later... */
7987 pReNative->Core.bmVars = 0;
7988 pReNative->Core.bmStack = 0;
7989 pReNative->Core.u64ArgVars = UINT64_MAX;
7990#endif
7991}
7992
7993
7994
7995/*********************************************************************************************************************************
7996* Emitters for IEM_MC_CALL_CIMPL_XXX *
7997*********************************************************************************************************************************/
7998
7999/**
8000 * Emits code to load a reference to the given guest register into @a idxGprDst.
8001 */
8002DECL_HIDDEN_THROW(uint32_t)
8003iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8004 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8005{
8006#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8007 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8008#endif
8009
8010 /*
8011 * Get the offset relative to the CPUMCTX structure.
8012 */
8013 uint32_t offCpumCtx;
8014 switch (enmClass)
8015 {
8016 case kIemNativeGstRegRef_Gpr:
8017 Assert(idxRegInClass < 16);
8018 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8019 break;
8020
8021 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8022 Assert(idxRegInClass < 4);
8023 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8024 break;
8025
8026 case kIemNativeGstRegRef_EFlags:
8027 Assert(idxRegInClass == 0);
8028 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8029 break;
8030
8031 case kIemNativeGstRegRef_MxCsr:
8032 Assert(idxRegInClass == 0);
8033 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8034 break;
8035
8036 case kIemNativeGstRegRef_FpuReg:
8037 Assert(idxRegInClass < 8);
8038 AssertFailed(); /** @todo what kind of indexing? */
8039 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8040 break;
8041
8042 case kIemNativeGstRegRef_MReg:
8043 Assert(idxRegInClass < 8);
8044 AssertFailed(); /** @todo what kind of indexing? */
8045 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8046 break;
8047
8048 case kIemNativeGstRegRef_XReg:
8049 Assert(idxRegInClass < 16);
8050 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8051 break;
8052
8053 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8054 Assert(idxRegInClass == 0);
8055 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8056 break;
8057
8058 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8059 Assert(idxRegInClass == 0);
8060 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8061 break;
8062
8063 default:
8064 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8065 }
8066
8067 /*
8068 * Load the value into the destination register.
8069 */
8070#ifdef RT_ARCH_AMD64
8071 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8072
8073#elif defined(RT_ARCH_ARM64)
8074 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8075 Assert(offCpumCtx < 4096);
8076 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8077
8078#else
8079# error "Port me!"
8080#endif
8081
8082 return off;
8083}
8084
8085
8086/**
8087 * Common code for CIMPL and AIMPL calls.
8088 *
8089 * These are calls that uses argument variables and such. They should not be
8090 * confused with internal calls required to implement an MC operation,
8091 * like a TLB load and similar.
8092 *
8093 * Upon return all that is left to do is to load any hidden arguments and
8094 * perform the call. All argument variables are freed.
8095 *
8096 * @returns New code buffer offset; throws VBox status code on error.
8097 * @param pReNative The native recompile state.
8098 * @param off The code buffer offset.
8099 * @param cArgs The total nubmer of arguments (includes hidden
8100 * count).
8101 * @param cHiddenArgs The number of hidden arguments. The hidden
8102 * arguments must not have any variable declared for
8103 * them, whereas all the regular arguments must
8104 * (tstIEMCheckMc ensures this).
8105 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8106 * this will still flush pending writes in call volatile registers if false.
8107 */
8108DECL_HIDDEN_THROW(uint32_t)
8109iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8110 bool fFlushPendingWrites /*= true*/)
8111{
8112#ifdef VBOX_STRICT
8113 /*
8114 * Assert sanity.
8115 */
8116 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8117 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8118 for (unsigned i = 0; i < cHiddenArgs; i++)
8119 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8120 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8121 {
8122 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8123 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8124 }
8125 iemNativeRegAssertSanity(pReNative);
8126#endif
8127
8128 /* We don't know what the called function makes use of, so flush any pending register writes. */
8129 RT_NOREF(fFlushPendingWrites);
8130#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8131 if (fFlushPendingWrites)
8132#endif
8133 off = iemNativeRegFlushPendingWrites(pReNative, off);
8134
8135 /*
8136 * Before we do anything else, go over variables that are referenced and
8137 * make sure they are not in a register.
8138 */
8139 uint32_t bmVars = pReNative->Core.bmVars;
8140 if (bmVars)
8141 {
8142 do
8143 {
8144 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8145 bmVars &= ~RT_BIT_32(idxVar);
8146
8147 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8148 {
8149 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8151 if ( idxRegOld != UINT8_MAX
8152 && pReNative->Core.aVars[idxVar].fSimdReg)
8153 {
8154 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8155 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8156
8157 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8158 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8159 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8160 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8161 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8162 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8163 else
8164 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8165
8166 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8167 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8168
8169 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8170 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8171 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8172 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8173 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8174 }
8175 else
8176#endif
8177 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8178 {
8179 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8180 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8181 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8182 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8183 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8184
8185 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8186 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8187 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8188 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8189 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8190 }
8191 }
8192 } while (bmVars != 0);
8193#if 0 //def VBOX_STRICT
8194 iemNativeRegAssertSanity(pReNative);
8195#endif
8196 }
8197
8198 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8199
8200#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8201 /*
8202 * At the very first step go over the host registers that will be used for arguments
8203 * don't shadow anything which needs writing back first.
8204 */
8205 for (uint32_t i = 0; i < cRegArgs; i++)
8206 {
8207 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8208
8209 /* Writeback any dirty guest shadows before using this register. */
8210 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8211 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8212 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8213 }
8214#endif
8215
8216 /*
8217 * First, go over the host registers that will be used for arguments and make
8218 * sure they either hold the desired argument or are free.
8219 */
8220 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8221 {
8222 for (uint32_t i = 0; i < cRegArgs; i++)
8223 {
8224 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8225 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8226 {
8227 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8228 {
8229 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8230 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8231 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8232 Assert(pVar->idxReg == idxArgReg);
8233 uint8_t const uArgNo = pVar->uArgNo;
8234 if (uArgNo == i)
8235 { /* prefect */ }
8236 /* The variable allocator logic should make sure this is impossible,
8237 except for when the return register is used as a parameter (ARM,
8238 but not x86). */
8239#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8240 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8241 {
8242# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8243# error "Implement this"
8244# endif
8245 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8246 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8247 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8248 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8249 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8250 }
8251#endif
8252 else
8253 {
8254 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8255
8256 if (pVar->enmKind == kIemNativeVarKind_Stack)
8257 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8258 else
8259 {
8260 /* just free it, can be reloaded if used again */
8261 pVar->idxReg = UINT8_MAX;
8262 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8263 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8264 }
8265 }
8266 }
8267 else
8268 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8269 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8270 }
8271 }
8272#if 0 //def VBOX_STRICT
8273 iemNativeRegAssertSanity(pReNative);
8274#endif
8275 }
8276
8277 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8278
8279#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8280 /*
8281 * If there are any stack arguments, make sure they are in their place as well.
8282 *
8283 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8284 * the caller) be loading it later and it must be free (see first loop).
8285 */
8286 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8287 {
8288 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8289 {
8290 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8291 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8292 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8293 {
8294 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8295 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8296 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8297 pVar->idxReg = UINT8_MAX;
8298 }
8299 else
8300 {
8301 /* Use ARG0 as temp for stuff we need registers for. */
8302 switch (pVar->enmKind)
8303 {
8304 case kIemNativeVarKind_Stack:
8305 {
8306 uint8_t const idxStackSlot = pVar->idxStackSlot;
8307 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8308 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8309 iemNativeStackCalcBpDisp(idxStackSlot));
8310 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8311 continue;
8312 }
8313
8314 case kIemNativeVarKind_Immediate:
8315 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8316 continue;
8317
8318 case kIemNativeVarKind_VarRef:
8319 {
8320 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8321 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8322 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8323 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8324 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8325# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8326 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8327 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8328 if ( fSimdReg
8329 && idxRegOther != UINT8_MAX)
8330 {
8331 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8332 if (cbVar == sizeof(RTUINT128U))
8333 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8334 else
8335 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8336 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8337 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8338 }
8339 else
8340# endif
8341 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8342 {
8343 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8344 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8345 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8346 }
8347 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8348 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8349 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8350 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8351 continue;
8352 }
8353
8354 case kIemNativeVarKind_GstRegRef:
8355 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8356 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8357 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8358 continue;
8359
8360 case kIemNativeVarKind_Invalid:
8361 case kIemNativeVarKind_End:
8362 break;
8363 }
8364 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8365 }
8366 }
8367# if 0 //def VBOX_STRICT
8368 iemNativeRegAssertSanity(pReNative);
8369# endif
8370 }
8371#else
8372 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8373#endif
8374
8375 /*
8376 * Make sure the argument variables are loaded into their respective registers.
8377 *
8378 * We can optimize this by ASSUMING that any register allocations are for
8379 * registeres that have already been loaded and are ready. The previous step
8380 * saw to that.
8381 */
8382 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8383 {
8384 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8385 {
8386 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8387 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8388 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8389 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8390 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8391 else
8392 {
8393 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8394 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8395 {
8396 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8397 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8398 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8399 | RT_BIT_32(idxArgReg);
8400 pVar->idxReg = idxArgReg;
8401 }
8402 else
8403 {
8404 /* Use ARG0 as temp for stuff we need registers for. */
8405 switch (pVar->enmKind)
8406 {
8407 case kIemNativeVarKind_Stack:
8408 {
8409 uint8_t const idxStackSlot = pVar->idxStackSlot;
8410 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8411 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8412 continue;
8413 }
8414
8415 case kIemNativeVarKind_Immediate:
8416 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8417 continue;
8418
8419 case kIemNativeVarKind_VarRef:
8420 {
8421 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8422 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8423 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8424 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8425 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8426 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8427#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8428 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8429 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8430 if ( fSimdReg
8431 && idxRegOther != UINT8_MAX)
8432 {
8433 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8434 if (cbVar == sizeof(RTUINT128U))
8435 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8436 else
8437 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8438 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8439 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8440 }
8441 else
8442#endif
8443 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8444 {
8445 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8446 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8447 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8448 }
8449 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8450 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8451 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8452 continue;
8453 }
8454
8455 case kIemNativeVarKind_GstRegRef:
8456 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8457 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8458 continue;
8459
8460 case kIemNativeVarKind_Invalid:
8461 case kIemNativeVarKind_End:
8462 break;
8463 }
8464 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8465 }
8466 }
8467 }
8468#if 0 //def VBOX_STRICT
8469 iemNativeRegAssertSanity(pReNative);
8470#endif
8471 }
8472#ifdef VBOX_STRICT
8473 else
8474 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8475 {
8476 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8477 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8478 }
8479#endif
8480
8481 /*
8482 * Free all argument variables (simplified).
8483 * Their lifetime always expires with the call they are for.
8484 */
8485 /** @todo Make the python script check that arguments aren't used after
8486 * IEM_MC_CALL_XXXX. */
8487 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8488 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8489 * an argument value. There is also some FPU stuff. */
8490 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8491 {
8492 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8493 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8494
8495 /* no need to free registers: */
8496 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8497 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8498 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8499 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8500 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8501 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8502
8503 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8504 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8505 iemNativeVarFreeStackSlots(pReNative, idxVar);
8506 }
8507 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8508
8509 /*
8510 * Flush volatile registers as we make the call.
8511 */
8512 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8513
8514 return off;
8515}
8516
8517
8518
8519/*********************************************************************************************************************************
8520* TLB Lookup. *
8521*********************************************************************************************************************************/
8522
8523/**
8524 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8525 */
8526DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8527{
8528 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8529 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8530 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8531 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8532
8533 /* Do the lookup manually. */
8534 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8535 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8536 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8537 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8538 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8539 {
8540 /*
8541 * Check TLB page table level access flags.
8542 */
8543 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8544 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8545 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8546 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8547 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8548 | IEMTLBE_F_PG_UNASSIGNED
8549 | IEMTLBE_F_PT_NO_ACCESSED
8550 | fNoWriteNoDirty | fNoUser);
8551 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8552 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8553 {
8554 /*
8555 * Return the address.
8556 */
8557 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8558 if ((uintptr_t)pbAddr == uResult)
8559 return;
8560 RT_NOREF(cbMem);
8561 AssertFailed();
8562 }
8563 else
8564 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8565 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8566 }
8567 else
8568 AssertFailed();
8569 RT_BREAKPOINT();
8570}
8571
8572/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8573
8574
8575
8576/*********************************************************************************************************************************
8577* Recompiler Core. *
8578*********************************************************************************************************************************/
8579
8580/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8581static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8582{
8583 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8584 pDis->cbCachedInstr += cbMaxRead;
8585 RT_NOREF(cbMinRead);
8586 return VERR_NO_DATA;
8587}
8588
8589
8590DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8591{
8592 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8593 {
8594#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8595 ENTRY(fLocalForcedActions),
8596 ENTRY(iem.s.rcPassUp),
8597 ENTRY(iem.s.fExec),
8598 ENTRY(iem.s.pbInstrBuf),
8599 ENTRY(iem.s.uInstrBufPc),
8600 ENTRY(iem.s.GCPhysInstrBuf),
8601 ENTRY(iem.s.cbInstrBufTotal),
8602 ENTRY(iem.s.idxTbCurInstr),
8603 ENTRY(iem.s.fSkippingEFlags),
8604#ifdef VBOX_WITH_STATISTICS
8605 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8606 ENTRY(iem.s.StatNativeTlbHitsForStore),
8607 ENTRY(iem.s.StatNativeTlbHitsForStack),
8608 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8609 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8610 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8611 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8612 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8613#endif
8614 ENTRY(iem.s.DataTlb.uTlbRevision),
8615 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8616 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8617 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8618 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8619 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8620 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8621 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8622 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8623 ENTRY(iem.s.DataTlb.aEntries),
8624 ENTRY(iem.s.CodeTlb.uTlbRevision),
8625 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8626 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8627 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8628 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8629 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8630 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8631 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8632 ENTRY(iem.s.CodeTlb.aEntries),
8633 ENTRY(pVMR3),
8634 ENTRY(cpum.GstCtx.rax),
8635 ENTRY(cpum.GstCtx.ah),
8636 ENTRY(cpum.GstCtx.rcx),
8637 ENTRY(cpum.GstCtx.ch),
8638 ENTRY(cpum.GstCtx.rdx),
8639 ENTRY(cpum.GstCtx.dh),
8640 ENTRY(cpum.GstCtx.rbx),
8641 ENTRY(cpum.GstCtx.bh),
8642 ENTRY(cpum.GstCtx.rsp),
8643 ENTRY(cpum.GstCtx.rbp),
8644 ENTRY(cpum.GstCtx.rsi),
8645 ENTRY(cpum.GstCtx.rdi),
8646 ENTRY(cpum.GstCtx.r8),
8647 ENTRY(cpum.GstCtx.r9),
8648 ENTRY(cpum.GstCtx.r10),
8649 ENTRY(cpum.GstCtx.r11),
8650 ENTRY(cpum.GstCtx.r12),
8651 ENTRY(cpum.GstCtx.r13),
8652 ENTRY(cpum.GstCtx.r14),
8653 ENTRY(cpum.GstCtx.r15),
8654 ENTRY(cpum.GstCtx.es.Sel),
8655 ENTRY(cpum.GstCtx.es.u64Base),
8656 ENTRY(cpum.GstCtx.es.u32Limit),
8657 ENTRY(cpum.GstCtx.es.Attr),
8658 ENTRY(cpum.GstCtx.cs.Sel),
8659 ENTRY(cpum.GstCtx.cs.u64Base),
8660 ENTRY(cpum.GstCtx.cs.u32Limit),
8661 ENTRY(cpum.GstCtx.cs.Attr),
8662 ENTRY(cpum.GstCtx.ss.Sel),
8663 ENTRY(cpum.GstCtx.ss.u64Base),
8664 ENTRY(cpum.GstCtx.ss.u32Limit),
8665 ENTRY(cpum.GstCtx.ss.Attr),
8666 ENTRY(cpum.GstCtx.ds.Sel),
8667 ENTRY(cpum.GstCtx.ds.u64Base),
8668 ENTRY(cpum.GstCtx.ds.u32Limit),
8669 ENTRY(cpum.GstCtx.ds.Attr),
8670 ENTRY(cpum.GstCtx.fs.Sel),
8671 ENTRY(cpum.GstCtx.fs.u64Base),
8672 ENTRY(cpum.GstCtx.fs.u32Limit),
8673 ENTRY(cpum.GstCtx.fs.Attr),
8674 ENTRY(cpum.GstCtx.gs.Sel),
8675 ENTRY(cpum.GstCtx.gs.u64Base),
8676 ENTRY(cpum.GstCtx.gs.u32Limit),
8677 ENTRY(cpum.GstCtx.gs.Attr),
8678 ENTRY(cpum.GstCtx.rip),
8679 ENTRY(cpum.GstCtx.eflags),
8680 ENTRY(cpum.GstCtx.uRipInhibitInt),
8681 ENTRY(cpum.GstCtx.cr0),
8682 ENTRY(cpum.GstCtx.cr4),
8683 ENTRY(cpum.GstCtx.aXcr[0]),
8684 ENTRY(cpum.GstCtx.aXcr[1]),
8685#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8686 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8687 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8688 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8689 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8690 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8691 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8692 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8693 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8694 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8695 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8696 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8697 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8698 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8699 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8700 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8701 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8702 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8703 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8704 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8705 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8706 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8707 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8708 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8709 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8710 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8711 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8712 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8713 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8714 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8715 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8716 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8717 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8718 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8719#endif
8720#undef ENTRY
8721 };
8722#ifdef VBOX_STRICT
8723 static bool s_fOrderChecked = false;
8724 if (!s_fOrderChecked)
8725 {
8726 s_fOrderChecked = true;
8727 uint32_t offPrev = s_aMembers[0].off;
8728 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8729 {
8730 Assert(s_aMembers[i].off > offPrev);
8731 offPrev = s_aMembers[i].off;
8732 }
8733 }
8734#endif
8735
8736 /*
8737 * Binary lookup.
8738 */
8739 unsigned iStart = 0;
8740 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8741 for (;;)
8742 {
8743 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8744 uint32_t const offCur = s_aMembers[iCur].off;
8745 if (off < offCur)
8746 {
8747 if (iCur != iStart)
8748 iEnd = iCur;
8749 else
8750 break;
8751 }
8752 else if (off > offCur)
8753 {
8754 if (iCur + 1 < iEnd)
8755 iStart = iCur + 1;
8756 else
8757 break;
8758 }
8759 else
8760 return s_aMembers[iCur].pszName;
8761 }
8762#ifdef VBOX_WITH_STATISTICS
8763 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8764 return "iem.s.acThreadedFuncStats[iFn]";
8765#endif
8766 return NULL;
8767}
8768
8769
8770DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8771{
8772 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8773#if defined(RT_ARCH_AMD64)
8774 static const char * const a_apszMarkers[] =
8775 {
8776 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8777 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8778 };
8779#endif
8780
8781 char szDisBuf[512];
8782 DISSTATE Dis;
8783 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8784 uint32_t const cNative = pTb->Native.cInstructions;
8785 uint32_t offNative = 0;
8786#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8787 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8788#endif
8789 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8790 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8791 : DISCPUMODE_64BIT;
8792#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8793 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8794#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8795 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8796#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8797# error "Port me"
8798#else
8799 csh hDisasm = ~(size_t)0;
8800# if defined(RT_ARCH_AMD64)
8801 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8802# elif defined(RT_ARCH_ARM64)
8803 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8804# else
8805# error "Port me"
8806# endif
8807 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8808
8809 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8810 //Assert(rcCs == CS_ERR_OK);
8811#endif
8812
8813 /*
8814 * Print TB info.
8815 */
8816 pHlp->pfnPrintf(pHlp,
8817 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8818 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8819 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8820 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8821#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8822 if (pDbgInfo && pDbgInfo->cEntries > 1)
8823 {
8824 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8825
8826 /*
8827 * This disassembly is driven by the debug info which follows the native
8828 * code and indicates when it starts with the next guest instructions,
8829 * where labels are and such things.
8830 */
8831 uint32_t idxThreadedCall = 0;
8832 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8833 uint8_t idxRange = UINT8_MAX;
8834 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8835 uint32_t offRange = 0;
8836 uint32_t offOpcodes = 0;
8837 uint32_t const cbOpcodes = pTb->cbOpcodes;
8838 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8839 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8840 uint32_t iDbgEntry = 1;
8841 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8842
8843 while (offNative < cNative)
8844 {
8845 /* If we're at or have passed the point where the next chunk of debug
8846 info starts, process it. */
8847 if (offDbgNativeNext <= offNative)
8848 {
8849 offDbgNativeNext = UINT32_MAX;
8850 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8851 {
8852 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8853 {
8854 case kIemTbDbgEntryType_GuestInstruction:
8855 {
8856 /* Did the exec flag change? */
8857 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8858 {
8859 pHlp->pfnPrintf(pHlp,
8860 " fExec change %#08x -> %#08x %s\n",
8861 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8862 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8863 szDisBuf, sizeof(szDisBuf)));
8864 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8865 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8866 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8867 : DISCPUMODE_64BIT;
8868 }
8869
8870 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8871 where the compilation was aborted before the opcode was recorded and the actual
8872 instruction was translated to a threaded call. This may happen when we run out
8873 of ranges, or when some complicated interrupts/FFs are found to be pending or
8874 similar. So, we just deal with it here rather than in the compiler code as it
8875 is a lot simpler to do here. */
8876 if ( idxRange == UINT8_MAX
8877 || idxRange >= cRanges
8878 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8879 {
8880 idxRange += 1;
8881 if (idxRange < cRanges)
8882 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8883 else
8884 continue;
8885 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8886 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8887 + (pTb->aRanges[idxRange].idxPhysPage == 0
8888 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8889 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8890 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8891 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8892 pTb->aRanges[idxRange].idxPhysPage);
8893 GCPhysPc += offRange;
8894 }
8895
8896 /* Disassemble the instruction. */
8897 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8898 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8899 uint32_t cbInstr = 1;
8900 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8901 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8902 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8903 if (RT_SUCCESS(rc))
8904 {
8905 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8906 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8907 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8908 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8909
8910 static unsigned const s_offMarker = 55;
8911 static char const s_szMarker[] = " ; <--- guest";
8912 if (cch < s_offMarker)
8913 {
8914 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8915 cch = s_offMarker;
8916 }
8917 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8918 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8919
8920 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8921 }
8922 else
8923 {
8924 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8925 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8926 cbInstr = 1;
8927 }
8928 GCPhysPc += cbInstr;
8929 offOpcodes += cbInstr;
8930 offRange += cbInstr;
8931 continue;
8932 }
8933
8934 case kIemTbDbgEntryType_ThreadedCall:
8935 pHlp->pfnPrintf(pHlp,
8936 " Call #%u to %s (%u args) - %s\n",
8937 idxThreadedCall,
8938 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8939 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8940 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8941 idxThreadedCall++;
8942 continue;
8943
8944 case kIemTbDbgEntryType_GuestRegShadowing:
8945 {
8946 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8947 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8948 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8949 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8950 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8951 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8952 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8953 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8954 else
8955 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8956 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8957 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8958 continue;
8959 }
8960
8961#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8962 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8963 {
8964 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8965 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8966 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8967 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8968 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8969 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8970 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8971 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8972 else
8973 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8974 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8975 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8976 continue;
8977 }
8978#endif
8979
8980 case kIemTbDbgEntryType_Label:
8981 {
8982 const char *pszName = "what_the_fudge";
8983 const char *pszComment = "";
8984 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8985 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8986 {
8987 case kIemNativeLabelType_Return: pszName = "Return"; break;
8988 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8989 case kIemNativeLabelType_ReturnBreakFF: pszName = "ReturnBreakFF"; break;
8990 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8991 case kIemNativeLabelType_ReturnBreakViaLookup: pszName = "ReturnBreakViaLookup"; break;
8992 case kIemNativeLabelType_ReturnBreakViaLookupWithIrq: pszName = "ReturnBreakViaLookupWithIrq"; break;
8993 case kIemNativeLabelType_ReturnBreakViaLookupWithTlb: pszName = "ReturnBreakViaLookupWithTlb"; break;
8994 case kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq: pszName = "ReturnBreakViaLookupWithTlbAndIrq"; break;
8995 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8996 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8997 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8998 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8999 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9000 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
9001 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9002 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9003 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9004 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9005 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9006 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9007 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9008 case kIemNativeLabelType_If:
9009 pszName = "If";
9010 fNumbered = true;
9011 break;
9012 case kIemNativeLabelType_Else:
9013 pszName = "Else";
9014 fNumbered = true;
9015 pszComment = " ; regs state restored pre-if-block";
9016 break;
9017 case kIemNativeLabelType_Endif:
9018 pszName = "Endif";
9019 fNumbered = true;
9020 break;
9021 case kIemNativeLabelType_CheckIrq:
9022 pszName = "CheckIrq_CheckVM";
9023 fNumbered = true;
9024 break;
9025 case kIemNativeLabelType_TlbLookup:
9026 pszName = "TlbLookup";
9027 fNumbered = true;
9028 break;
9029 case kIemNativeLabelType_TlbMiss:
9030 pszName = "TlbMiss";
9031 fNumbered = true;
9032 break;
9033 case kIemNativeLabelType_TlbDone:
9034 pszName = "TlbDone";
9035 fNumbered = true;
9036 break;
9037 case kIemNativeLabelType_Invalid:
9038 case kIemNativeLabelType_End:
9039 break;
9040 }
9041 if (fNumbered)
9042 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9043 else
9044 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9045 continue;
9046 }
9047
9048 case kIemTbDbgEntryType_NativeOffset:
9049 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9050 Assert(offDbgNativeNext >= offNative);
9051 break;
9052
9053#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9054 case kIemTbDbgEntryType_DelayedPcUpdate:
9055 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9056 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9057 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9058 continue;
9059#endif
9060
9061#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9062 case kIemTbDbgEntryType_GuestRegDirty:
9063 {
9064 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9065 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9066 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9067 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9068 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9069 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9070 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9071 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9072 pszGstReg, pszHstReg);
9073 continue;
9074 }
9075
9076 case kIemTbDbgEntryType_GuestRegWriteback:
9077 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9078 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9079 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9080 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9081 continue;
9082#endif
9083
9084 default:
9085 AssertFailed();
9086 }
9087 iDbgEntry++;
9088 break;
9089 }
9090 }
9091
9092 /*
9093 * Disassemble the next native instruction.
9094 */
9095 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9096# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9097 uint32_t cbInstr = sizeof(paNative[0]);
9098 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9099 if (RT_SUCCESS(rc))
9100 {
9101# if defined(RT_ARCH_AMD64)
9102 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9103 {
9104 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9105 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9106 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9107 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9108 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9109 uInfo & 0x8000 ? "recompiled" : "todo");
9110 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9111 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9112 else
9113 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9114 }
9115 else
9116# endif
9117 {
9118 const char *pszAnnotation = NULL;
9119# ifdef RT_ARCH_AMD64
9120 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9121 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9122 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9123 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9124 PCDISOPPARAM pMemOp;
9125 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9126 pMemOp = &Dis.Param1;
9127 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9128 pMemOp = &Dis.Param2;
9129 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9130 pMemOp = &Dis.Param3;
9131 else
9132 pMemOp = NULL;
9133 if ( pMemOp
9134 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9135 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9136 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9137 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9138
9139#elif defined(RT_ARCH_ARM64)
9140 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9141 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9142 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9143# else
9144# error "Port me"
9145# endif
9146 if (pszAnnotation)
9147 {
9148 static unsigned const s_offAnnotation = 55;
9149 size_t const cchAnnotation = strlen(pszAnnotation);
9150 size_t cchDis = strlen(szDisBuf);
9151 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9152 {
9153 if (cchDis < s_offAnnotation)
9154 {
9155 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9156 cchDis = s_offAnnotation;
9157 }
9158 szDisBuf[cchDis++] = ' ';
9159 szDisBuf[cchDis++] = ';';
9160 szDisBuf[cchDis++] = ' ';
9161 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9162 }
9163 }
9164 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9165 }
9166 }
9167 else
9168 {
9169# if defined(RT_ARCH_AMD64)
9170 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9171 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9172# elif defined(RT_ARCH_ARM64)
9173 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9174# else
9175# error "Port me"
9176# endif
9177 cbInstr = sizeof(paNative[0]);
9178 }
9179 offNative += cbInstr / sizeof(paNative[0]);
9180
9181# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9182 cs_insn *pInstr;
9183 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9184 (uintptr_t)pNativeCur, 1, &pInstr);
9185 if (cInstrs > 0)
9186 {
9187 Assert(cInstrs == 1);
9188 const char *pszAnnotation = NULL;
9189# if defined(RT_ARCH_ARM64)
9190 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9191 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9192 {
9193 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9194 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9195 char *psz = strchr(pInstr->op_str, '[');
9196 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9197 {
9198 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9199 int32_t off = -1;
9200 psz += 4;
9201 if (*psz == ']')
9202 off = 0;
9203 else if (*psz == ',')
9204 {
9205 psz = RTStrStripL(psz + 1);
9206 if (*psz == '#')
9207 off = RTStrToInt32(&psz[1]);
9208 /** @todo deal with index registers and LSL as well... */
9209 }
9210 if (off >= 0)
9211 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9212 }
9213 }
9214# endif
9215
9216 size_t const cchOp = strlen(pInstr->op_str);
9217# if defined(RT_ARCH_AMD64)
9218 if (pszAnnotation)
9219 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9220 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9221 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9222 else
9223 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9224 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9225
9226# else
9227 if (pszAnnotation)
9228 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9229 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9230 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9231 else
9232 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9233 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9234# endif
9235 offNative += pInstr->size / sizeof(*pNativeCur);
9236 cs_free(pInstr, cInstrs);
9237 }
9238 else
9239 {
9240# if defined(RT_ARCH_AMD64)
9241 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9242 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9243# else
9244 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9245# endif
9246 offNative++;
9247 }
9248# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9249 }
9250 }
9251 else
9252#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9253 {
9254 /*
9255 * No debug info, just disassemble the x86 code and then the native code.
9256 *
9257 * First the guest code:
9258 */
9259 for (unsigned i = 0; i < pTb->cRanges; i++)
9260 {
9261 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9262 + (pTb->aRanges[i].idxPhysPage == 0
9263 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9264 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9265 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9266 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9267 unsigned off = pTb->aRanges[i].offOpcodes;
9268 /** @todo this ain't working when crossing pages! */
9269 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9270 while (off < cbOpcodes)
9271 {
9272 uint32_t cbInstr = 1;
9273 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9274 &pTb->pabOpcodes[off], cbOpcodes - off,
9275 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9276 if (RT_SUCCESS(rc))
9277 {
9278 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9279 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9280 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9281 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9282 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9283 GCPhysPc += cbInstr;
9284 off += cbInstr;
9285 }
9286 else
9287 {
9288 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9289 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9290 break;
9291 }
9292 }
9293 }
9294
9295 /*
9296 * Then the native code:
9297 */
9298 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9299 while (offNative < cNative)
9300 {
9301 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9302# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9303 uint32_t cbInstr = sizeof(paNative[0]);
9304 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9305 if (RT_SUCCESS(rc))
9306 {
9307# if defined(RT_ARCH_AMD64)
9308 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9309 {
9310 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9311 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9312 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9313 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9314 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9315 uInfo & 0x8000 ? "recompiled" : "todo");
9316 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9317 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9318 else
9319 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9320 }
9321 else
9322# endif
9323 {
9324# ifdef RT_ARCH_AMD64
9325 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9326 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9327 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9328 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9329# elif defined(RT_ARCH_ARM64)
9330 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9331 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9332 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9333# else
9334# error "Port me"
9335# endif
9336 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9337 }
9338 }
9339 else
9340 {
9341# if defined(RT_ARCH_AMD64)
9342 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9343 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9344# else
9345 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9346# endif
9347 cbInstr = sizeof(paNative[0]);
9348 }
9349 offNative += cbInstr / sizeof(paNative[0]);
9350
9351# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9352 cs_insn *pInstr;
9353 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9354 (uintptr_t)pNativeCur, 1, &pInstr);
9355 if (cInstrs > 0)
9356 {
9357 Assert(cInstrs == 1);
9358# if defined(RT_ARCH_AMD64)
9359 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9360 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9361# else
9362 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9363 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9364# endif
9365 offNative += pInstr->size / sizeof(*pNativeCur);
9366 cs_free(pInstr, cInstrs);
9367 }
9368 else
9369 {
9370# if defined(RT_ARCH_AMD64)
9371 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9372 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9373# else
9374 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9375# endif
9376 offNative++;
9377 }
9378# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9379 }
9380 }
9381
9382#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9383 /* Cleanup. */
9384 cs_close(&hDisasm);
9385#endif
9386}
9387
9388
9389/**
9390 * Recompiles the given threaded TB into a native one.
9391 *
9392 * In case of failure the translation block will be returned as-is.
9393 *
9394 * @returns pTb.
9395 * @param pVCpu The cross context virtual CPU structure of the calling
9396 * thread.
9397 * @param pTb The threaded translation to recompile to native.
9398 */
9399DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9400{
9401#if 0 /* For profiling the native recompiler code. */
9402l_profile_again:
9403#endif
9404 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9405
9406 /*
9407 * The first time thru, we allocate the recompiler state, the other times
9408 * we just need to reset it before using it again.
9409 */
9410 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9411 if (RT_LIKELY(pReNative))
9412 iemNativeReInit(pReNative, pTb);
9413 else
9414 {
9415 pReNative = iemNativeInit(pVCpu, pTb);
9416 AssertReturn(pReNative, pTb);
9417 }
9418
9419#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9420 /*
9421 * First do liveness analysis. This is done backwards.
9422 */
9423 {
9424 uint32_t idxCall = pTb->Thrd.cCalls;
9425 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9426 { /* likely */ }
9427 else
9428 {
9429 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9430 while (idxCall > cAlloc)
9431 cAlloc *= 2;
9432 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9433 AssertReturn(pvNew, pTb);
9434 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9435 pReNative->cLivenessEntriesAlloc = cAlloc;
9436 }
9437 AssertReturn(idxCall > 0, pTb);
9438 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9439
9440 /* The initial (final) entry. */
9441 idxCall--;
9442 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9443
9444 /* Loop backwards thru the calls and fill in the other entries. */
9445 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9446 while (idxCall > 0)
9447 {
9448 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9449 if (pfnLiveness)
9450 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9451 else
9452 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9453 pCallEntry--;
9454 idxCall--;
9455 }
9456
9457# ifdef VBOX_WITH_STATISTICS
9458 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9459 to 'clobbered' rather that 'input'. */
9460 /** @todo */
9461# endif
9462 }
9463#endif
9464
9465 /*
9466 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9467 * for aborting if an error happens.
9468 */
9469 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9470#ifdef LOG_ENABLED
9471 uint32_t const cCallsOrg = cCallsLeft;
9472#endif
9473 uint32_t off = 0;
9474 int rc = VINF_SUCCESS;
9475 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9476 {
9477#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
9478 /*
9479 * Emit prolog code (fixed).
9480 */
9481 off = iemNativeEmitProlog(pReNative, off);
9482#endif
9483
9484 /*
9485 * Convert the calls to native code.
9486 */
9487#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9488 int32_t iGstInstr = -1;
9489#endif
9490#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9491 uint32_t cThreadedCalls = 0;
9492 uint32_t cRecompiledCalls = 0;
9493#endif
9494#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9495 uint32_t idxCurCall = 0;
9496#endif
9497 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9498 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9499 while (cCallsLeft-- > 0)
9500 {
9501 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9502#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9503 pReNative->idxCurCall = idxCurCall;
9504#endif
9505
9506 /*
9507 * Debug info, assembly markup and statistics.
9508 */
9509#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9510 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9511 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9512#endif
9513#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9514 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9515 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9516 {
9517 if (iGstInstr < (int32_t)pTb->cInstructions)
9518 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9519 else
9520 Assert(iGstInstr == pTb->cInstructions);
9521 iGstInstr = pCallEntry->idxInstr;
9522 }
9523 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9524#endif
9525#if defined(VBOX_STRICT)
9526 off = iemNativeEmitMarker(pReNative, off,
9527 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9528#endif
9529#if defined(VBOX_STRICT)
9530 iemNativeRegAssertSanity(pReNative);
9531#endif
9532#ifdef VBOX_WITH_STATISTICS
9533 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9534#endif
9535
9536 /*
9537 * Actual work.
9538 */
9539 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9540 pfnRecom ? "(recompiled)" : "(todo)"));
9541 if (pfnRecom) /** @todo stats on this. */
9542 {
9543 off = pfnRecom(pReNative, off, pCallEntry);
9544 STAM_REL_STATS({cRecompiledCalls++;});
9545 }
9546 else
9547 {
9548 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9549 STAM_REL_STATS({cThreadedCalls++;});
9550 }
9551 Assert(off <= pReNative->cInstrBufAlloc);
9552 Assert(pReNative->cCondDepth == 0);
9553
9554#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9555 if (LogIs2Enabled())
9556 {
9557 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9558# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9559 static const char s_achState[] = "CUXI";
9560# else
9561 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9562# endif
9563
9564 char szGpr[17];
9565 for (unsigned i = 0; i < 16; i++)
9566 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9567 szGpr[16] = '\0';
9568
9569 char szSegBase[X86_SREG_COUNT + 1];
9570 char szSegLimit[X86_SREG_COUNT + 1];
9571 char szSegAttrib[X86_SREG_COUNT + 1];
9572 char szSegSel[X86_SREG_COUNT + 1];
9573 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9574 {
9575 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9576 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9577 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9578 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9579 }
9580 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9581 = szSegSel[X86_SREG_COUNT] = '\0';
9582
9583 char szEFlags[8];
9584 for (unsigned i = 0; i < 7; i++)
9585 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9586 szEFlags[7] = '\0';
9587
9588 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9589 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9590 }
9591#endif
9592
9593 /*
9594 * Advance.
9595 */
9596 pCallEntry++;
9597#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9598 idxCurCall++;
9599#endif
9600 }
9601
9602 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9603 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9604 if (!cThreadedCalls)
9605 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9606
9607#ifdef VBOX_WITH_STATISTICS
9608 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
9609#endif
9610
9611 /*
9612 * Emit the epilog code.
9613 */
9614 uint32_t idxReturnLabel;
9615 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9616
9617 /*
9618 * Generate special jump labels.
9619 */
9620 if (pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookup)
9621 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithIrq)
9622 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlb)
9623 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq) ))
9624 off = iemNativeEmitReturnBreakViaLookup(pReNative, off); /* Must come before ReturnBreak! */
9625
9626 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9627 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9628
9629 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
9630 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
9631
9632 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9633 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9634
9635 /*
9636 * Generate simple TB tail labels that just calls a help with a pVCpu
9637 * arg and either return or longjmps/throws a non-zero status.
9638 *
9639 * The array entries must be ordered by enmLabel value so we can index
9640 * using fTailLabels bit numbers.
9641 */
9642 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9643 static struct
9644 {
9645 IEMNATIVELABELTYPE enmLabel;
9646 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9647 } const g_aSimpleTailLabels[] =
9648 {
9649 { kIemNativeLabelType_Invalid, NULL },
9650 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9651 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9652 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9653 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9654 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9655 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9656 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9657 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9658 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9659 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9660 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9661 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9662 };
9663
9664 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9665 AssertCompile(kIemNativeLabelType_Invalid == 0);
9666 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9667 if (fTailLabels)
9668 {
9669 do
9670 {
9671 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9672 fTailLabels &= ~RT_BIT_64(enmLabel);
9673 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9674
9675 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9676 Assert(idxLabel != UINT32_MAX);
9677 if (idxLabel != UINT32_MAX)
9678 {
9679 iemNativeLabelDefine(pReNative, idxLabel, off);
9680
9681 /* int pfnCallback(PVMCPUCC pVCpu) */
9682 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9683 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9684
9685 /* jump back to the return sequence. */
9686 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9687 }
9688
9689 } while (fTailLabels);
9690 }
9691 }
9692 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9693 {
9694 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9695 return pTb;
9696 }
9697 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9698 Assert(off <= pReNative->cInstrBufAlloc);
9699
9700 /*
9701 * Make sure all labels has been defined.
9702 */
9703 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9704#ifdef VBOX_STRICT
9705 uint32_t const cLabels = pReNative->cLabels;
9706 for (uint32_t i = 0; i < cLabels; i++)
9707 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9708#endif
9709
9710#if 0 /* For profiling the native recompiler code. */
9711 if (pTb->Thrd.cCalls >= 136)
9712 {
9713 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9714 goto l_profile_again;
9715 }
9716#endif
9717
9718 /*
9719 * Allocate executable memory, copy over the code we've generated.
9720 */
9721 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9722 if (pTbAllocator->pDelayedFreeHead)
9723 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9724
9725 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
9726 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb, (void **)&paFinalInstrBufRx);
9727 AssertReturn(paFinalInstrBuf, pTb);
9728 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9729
9730 /*
9731 * Apply fixups.
9732 */
9733 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9734 uint32_t const cFixups = pReNative->cFixups;
9735 for (uint32_t i = 0; i < cFixups; i++)
9736 {
9737 Assert(paFixups[i].off < off);
9738 Assert(paFixups[i].idxLabel < cLabels);
9739 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9740 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9741 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9742 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9743 switch (paFixups[i].enmType)
9744 {
9745#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9746 case kIemNativeFixupType_Rel32:
9747 Assert(paFixups[i].off + 4 <= off);
9748 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9749 continue;
9750
9751#elif defined(RT_ARCH_ARM64)
9752 case kIemNativeFixupType_RelImm26At0:
9753 {
9754 Assert(paFixups[i].off < off);
9755 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9756 Assert(offDisp >= -262144 && offDisp < 262144);
9757 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9758 continue;
9759 }
9760
9761 case kIemNativeFixupType_RelImm19At5:
9762 {
9763 Assert(paFixups[i].off < off);
9764 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9765 Assert(offDisp >= -262144 && offDisp < 262144);
9766 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9767 continue;
9768 }
9769
9770 case kIemNativeFixupType_RelImm14At5:
9771 {
9772 Assert(paFixups[i].off < off);
9773 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9774 Assert(offDisp >= -8192 && offDisp < 8192);
9775 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9776 continue;
9777 }
9778
9779#endif
9780 case kIemNativeFixupType_Invalid:
9781 case kIemNativeFixupType_End:
9782 break;
9783 }
9784 AssertFailed();
9785 }
9786
9787 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
9788 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9789
9790 /*
9791 * Convert the translation block.
9792 */
9793 RTMemFree(pTb->Thrd.paCalls);
9794 pTb->Native.paInstructions = paFinalInstrBufRx;
9795 pTb->Native.cInstructions = off;
9796 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9797#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9798 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9799 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9800#endif
9801
9802 Assert(pTbAllocator->cThreadedTbs > 0);
9803 pTbAllocator->cThreadedTbs -= 1;
9804 pTbAllocator->cNativeTbs += 1;
9805 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9806
9807#ifdef LOG_ENABLED
9808 /*
9809 * Disassemble to the log if enabled.
9810 */
9811 if (LogIs3Enabled())
9812 {
9813 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9814 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9815# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9816 RTLogFlush(NULL);
9817# endif
9818 }
9819#endif
9820 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9821
9822 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9823 return pTb;
9824}
9825
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette