VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 105414

Last change on this file since 105414 was 105318, checked in by vboxsync, 7 months ago

VMM/IEM: Tweak for really large TLBs; save an instruction on ARM64 and'ing with 32-bit constants that aren't more than 16 bits wide; update the iemNativeHlpCheckTlbLookup code and made it work on ARM64. bugref:10687

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 449.3 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 105318 2024-07-13 00:53:36Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80
81
82/*
83 * Narrow down configs here to avoid wasting time on unused configs here.
84 * Note! Same checks in IEMAllThrdRecompiler.cpp.
85 */
86
87#ifndef IEM_WITH_CODE_TLB
88# error The code TLB must be enabled for the recompiler.
89#endif
90
91#ifndef IEM_WITH_DATA_TLB
92# error The data TLB must be enabled for the recompiler.
93#endif
94
95#ifndef IEM_WITH_SETJMP
96# error The setjmp approach must be enabled for the recompiler.
97#endif
98
99/** @todo eliminate this clang build hack. */
100#if RT_CLANG_PREREQ(4, 0)
101# pragma GCC diagnostic ignored "-Wunused-function"
102#endif
103
104
105/*********************************************************************************************************************************
106* Internal Functions *
107*********************************************************************************************************************************/
108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
109static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
110#endif
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
113 IEMNATIVEGSTREG enmGstReg, uint32_t off);
114DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
115static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
116
117
118
119/*********************************************************************************************************************************
120* Native Recompilation *
121*********************************************************************************************************************************/
122
123
124/**
125 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
126 */
127IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
128{
129 pVCpu->iem.s.cInstructions += idxInstr;
130 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
131}
132
133
134/**
135 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
136 */
137DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
138{
139 uint64_t fCpu = pVCpu->fLocalForcedActions;
140 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
141 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
142 | VMCPU_FF_TLB_FLUSH
143 | VMCPU_FF_UNHALT );
144 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
145 if (RT_LIKELY( ( !fCpu
146 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
147 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
148 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
149 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
150 return false;
151 return true;
152}
153
154
155/**
156 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
157 */
158template <bool const a_fWithIrqCheck>
159IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
160 uint32_t fFlags, RTGCPHYS GCPhysPc))
161{
162 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
163 Assert(idxTbLookup < pTb->cTbLookupEntries);
164 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
165#if 1
166 PIEMTB const pNewTb = *ppNewTb;
167 if (pNewTb)
168 {
169# ifdef VBOX_STRICT
170 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
171 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
172 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
173 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
174 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
175 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
176# endif
177 if (pNewTb->GCPhysPc == GCPhysPc)
178 {
179# ifdef VBOX_STRICT
180 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
181 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
182 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
183 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
184 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
185# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
186 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
187# else
188 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
189 {
190 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
191 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
192 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
193 }
194# endif
195 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
196 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
197 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
198#endif
199
200 /*
201 * Check them + type.
202 */
203 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
204 {
205 /*
206 * Check for interrupts and stuff.
207 */
208 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
209 * The main problem are the statistics and to some degree the logging. :/ */
210 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
211 {
212 /* Do polling. */
213 uint64_t const cTbExecNative = pVCpu->iem.s.cTbExecNative;
214 if ( RT_LIKELY(cTbExecNative & 511)
215 || !TMTimerPollBoolWith32BitMilliTS(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->iem.s.msRecompilerPollNow) )
216 {
217 /*
218 * Success. Update statistics and switch to the next TB.
219 */
220 pVCpu->iem.s.cTbExecNative = cTbExecNative + 1;
221 if (a_fWithIrqCheck)
222 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
223 else
224 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
225
226 pNewTb->cUsed += 1;
227 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
228 pVCpu->iem.s.pCurTbR3 = pNewTb;
229 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
231 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
232 return (uintptr_t)pNewTb->Native.paInstructions;
233 }
234 }
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
236 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
237 }
238 else
239 {
240 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
241 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
242 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
243 }
244 }
245 else
246 {
247 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
249 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
250 }
251 }
252 else
253 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
254#else
255 NOREF(GCPhysPc);
256#endif
257
258 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
259 return 0;
260}
261
262
263/**
264 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
265 */
266template <bool const a_fWithIrqCheck>
267IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
268{
269 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
270 Assert(idxTbLookup < pTb->cTbLookupEntries);
271 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
272#if 1
273 PIEMTB const pNewTb = *ppNewTb;
274 if (pNewTb)
275 {
276 /*
277 * Calculate the flags for the next TB and check if they match.
278 */
279 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
280 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
281 { /* likely */ }
282 else
283 {
284 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
285 fFlags |= IEMTB_F_INHIBIT_SHADOW;
286 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
287 fFlags |= IEMTB_F_INHIBIT_NMI;
288 }
289 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
290 {
291 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
292 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
293 { /* likely */ }
294 else
295 fFlags |= IEMTB_F_CS_LIM_CHECKS;
296 }
297 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
298
299 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
300 {
301 /*
302 * Do the TLB lookup for flat RIP and compare the result with the next TB.
303 *
304 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
305 */
306 /* Calc the effective PC. */
307 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
308 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
309 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
310
311 /* Advance within the current buffer (PAGE) when possible. */
312 RTGCPHYS GCPhysPc;
313 uint64_t off;
314 if ( pVCpu->iem.s.pbInstrBuf
315 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
316 {
317 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
318 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
319 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
320 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
321 else
322 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
323 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
324 }
325 else
326 {
327 pVCpu->iem.s.pbInstrBuf = NULL;
328 pVCpu->iem.s.offCurInstrStart = 0;
329 pVCpu->iem.s.offInstrNextByte = 0;
330 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
331 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
332 }
333
334 if (pNewTb->GCPhysPc == GCPhysPc)
335 {
336 /*
337 * Check for interrupts and stuff.
338 */
339 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
340 * The main problem are the statistics and to some degree the logging. :/ */
341 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
342 {
343 /* Do polling. */
344 uint64_t const cTbExecNative = pVCpu->iem.s.cTbExecNative;
345 if ( RT_LIKELY(cTbExecNative & 511)
346 || !TMTimerPollBoolWith32BitMilliTS(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->iem.s.msRecompilerPollNow) )
347 {
348 /*
349 * Success. Update statistics and switch to the next TB.
350 */
351 pVCpu->iem.s.cTbExecNative = cTbExecNative + 1;
352 if (a_fWithIrqCheck)
353 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
354 else
355 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
356
357 pNewTb->cUsed += 1;
358 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
359 pVCpu->iem.s.pCurTbR3 = pNewTb;
360 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
361 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
362 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
363 return (uintptr_t)pNewTb->Native.paInstructions;
364 }
365 }
366 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
367 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
368 }
369 else
370 {
371 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
372 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
373 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
374 }
375 }
376 else
377 {
378 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
379 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
380 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
381 }
382 }
383 else
384 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
385#else
386 NOREF(fFlags);
387 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
388#endif
389
390 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
391 return 0;
392}
393
394
395/**
396 * Used by TB code when it wants to raise a \#DE.
397 */
398IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
399{
400 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
401 iemRaiseDivideErrorJmp(pVCpu);
402#ifndef _MSC_VER
403 return VINF_IEM_RAISED_XCPT; /* not reached */
404#endif
405}
406
407
408/**
409 * Used by TB code when it wants to raise a \#UD.
410 */
411IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
412{
413 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
414 iemRaiseUndefinedOpcodeJmp(pVCpu);
415#ifndef _MSC_VER
416 return VINF_IEM_RAISED_XCPT; /* not reached */
417#endif
418}
419
420
421/**
422 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
423 *
424 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
425 */
426IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
427{
428 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
429 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
430 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
431 iemRaiseUndefinedOpcodeJmp(pVCpu);
432 else
433 iemRaiseDeviceNotAvailableJmp(pVCpu);
434#ifndef _MSC_VER
435 return VINF_IEM_RAISED_XCPT; /* not reached */
436#endif
437}
438
439
440/**
441 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
442 *
443 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
444 */
445IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
446{
447 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
448 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
449 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
450 iemRaiseUndefinedOpcodeJmp(pVCpu);
451 else
452 iemRaiseDeviceNotAvailableJmp(pVCpu);
453#ifndef _MSC_VER
454 return VINF_IEM_RAISED_XCPT; /* not reached */
455#endif
456}
457
458
459/**
460 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
461 *
462 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
463 */
464IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
465{
466 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
467 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
468 iemRaiseSimdFpExceptionJmp(pVCpu);
469 else
470 iemRaiseUndefinedOpcodeJmp(pVCpu);
471#ifndef _MSC_VER
472 return VINF_IEM_RAISED_XCPT; /* not reached */
473#endif
474}
475
476
477/**
478 * Used by TB code when it wants to raise a \#NM.
479 */
480IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
481{
482 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
483 iemRaiseDeviceNotAvailableJmp(pVCpu);
484#ifndef _MSC_VER
485 return VINF_IEM_RAISED_XCPT; /* not reached */
486#endif
487}
488
489
490/**
491 * Used by TB code when it wants to raise a \#GP(0).
492 */
493IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
494{
495 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
496 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
497#ifndef _MSC_VER
498 return VINF_IEM_RAISED_XCPT; /* not reached */
499#endif
500}
501
502
503/**
504 * Used by TB code when it wants to raise a \#MF.
505 */
506IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
507{
508 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
509 iemRaiseMathFaultJmp(pVCpu);
510#ifndef _MSC_VER
511 return VINF_IEM_RAISED_XCPT; /* not reached */
512#endif
513}
514
515
516/**
517 * Used by TB code when it wants to raise a \#XF.
518 */
519IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
520{
521 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
522 iemRaiseSimdFpExceptionJmp(pVCpu);
523#ifndef _MSC_VER
524 return VINF_IEM_RAISED_XCPT; /* not reached */
525#endif
526}
527
528
529/**
530 * Used by TB code when detecting opcode changes.
531 * @see iemThreadeFuncWorkerObsoleteTb
532 */
533IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
534{
535 /* We set fSafeToFree to false where as we're being called in the context
536 of a TB callback function, which for native TBs means we cannot release
537 the executable memory till we've returned our way back to iemTbExec as
538 that return path codes via the native code generated for the TB. */
539 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
540 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
541 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
542 return VINF_IEM_REEXEC_BREAK;
543}
544
545
546/**
547 * Used by TB code when we need to switch to a TB with CS.LIM checking.
548 */
549IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
550{
551 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
552 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
553 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
554 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
555 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
556 return VINF_IEM_REEXEC_BREAK;
557}
558
559
560/**
561 * Used by TB code when we missed a PC check after a branch.
562 */
563IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
564{
565 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
566 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
567 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
568 pVCpu->iem.s.pbInstrBuf));
569 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
570 return VINF_IEM_REEXEC_BREAK;
571}
572
573
574
575/*********************************************************************************************************************************
576* Helpers: Segmented memory fetches and stores. *
577*********************************************************************************************************************************/
578
579/**
580 * Used by TB code to load unsigned 8-bit data w/ segmentation.
581 */
582IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
583{
584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
585 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
586#else
587 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
588#endif
589}
590
591
592/**
593 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
594 * to 16 bits.
595 */
596IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
597{
598#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
599 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
600#else
601 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
602#endif
603}
604
605
606/**
607 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
608 * to 32 bits.
609 */
610IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
611{
612#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
613 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
614#else
615 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
616#endif
617}
618
619/**
620 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
621 * to 64 bits.
622 */
623IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
624{
625#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
626 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
627#else
628 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
629#endif
630}
631
632
633/**
634 * Used by TB code to load unsigned 16-bit data w/ segmentation.
635 */
636IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
637{
638#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
639 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
640#else
641 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
642#endif
643}
644
645
646/**
647 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
648 * to 32 bits.
649 */
650IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
651{
652#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
653 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
654#else
655 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
656#endif
657}
658
659
660/**
661 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
662 * to 64 bits.
663 */
664IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
665{
666#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
667 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
668#else
669 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
670#endif
671}
672
673
674/**
675 * Used by TB code to load unsigned 32-bit data w/ segmentation.
676 */
677IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
678{
679#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
680 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
681#else
682 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
683#endif
684}
685
686
687/**
688 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
689 * to 64 bits.
690 */
691IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
692{
693#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
694 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
695#else
696 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
697#endif
698}
699
700
701/**
702 * Used by TB code to load unsigned 64-bit data w/ segmentation.
703 */
704IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
705{
706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
707 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
708#else
709 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
710#endif
711}
712
713
714#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
715/**
716 * Used by TB code to load 128-bit data w/ segmentation.
717 */
718IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
719{
720#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
721 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
722#else
723 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
724#endif
725}
726
727
728/**
729 * Used by TB code to load 128-bit data w/ segmentation.
730 */
731IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
732{
733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
734 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
735#else
736 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
737#endif
738}
739
740
741/**
742 * Used by TB code to load 128-bit data w/ segmentation.
743 */
744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
745{
746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
747 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
748#else
749 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
750#endif
751}
752
753
754/**
755 * Used by TB code to load 256-bit data w/ segmentation.
756 */
757IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
758{
759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
760 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
761#else
762 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
763#endif
764}
765
766
767/**
768 * Used by TB code to load 256-bit data w/ segmentation.
769 */
770IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
771{
772#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
773 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
774#else
775 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
776#endif
777}
778#endif
779
780
781/**
782 * Used by TB code to store unsigned 8-bit data w/ segmentation.
783 */
784IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
785{
786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
787 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
788#else
789 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
790#endif
791}
792
793
794/**
795 * Used by TB code to store unsigned 16-bit data w/ segmentation.
796 */
797IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
798{
799#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
800 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
801#else
802 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
803#endif
804}
805
806
807/**
808 * Used by TB code to store unsigned 32-bit data w/ segmentation.
809 */
810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
811{
812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
813 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
814#else
815 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
816#endif
817}
818
819
820/**
821 * Used by TB code to store unsigned 64-bit data w/ segmentation.
822 */
823IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
824{
825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
826 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
827#else
828 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
829#endif
830}
831
832
833#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
834/**
835 * Used by TB code to store unsigned 128-bit data w/ segmentation.
836 */
837IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
838{
839#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
840 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
841#else
842 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
843#endif
844}
845
846
847/**
848 * Used by TB code to store unsigned 128-bit data w/ segmentation.
849 */
850IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
851{
852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
853 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
854#else
855 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
856#endif
857}
858
859
860/**
861 * Used by TB code to store unsigned 256-bit data w/ segmentation.
862 */
863IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
864{
865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
866 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
867#else
868 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
869#endif
870}
871
872
873/**
874 * Used by TB code to store unsigned 256-bit data w/ segmentation.
875 */
876IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
877{
878#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
879 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
880#else
881 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
882#endif
883}
884#endif
885
886
887
888/**
889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
890 */
891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
892{
893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
895#else
896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
897#endif
898}
899
900
901/**
902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
903 */
904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
905{
906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
908#else
909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
910#endif
911}
912
913
914/**
915 * Used by TB code to store an 32-bit selector value onto a generic stack.
916 *
917 * Intel CPUs doesn't do write a whole dword, thus the special function.
918 */
919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
920{
921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
923#else
924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
925#endif
926}
927
928
929/**
930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
931 */
932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
933{
934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
936#else
937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
938#endif
939}
940
941
942/**
943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
944 */
945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
946{
947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
949#else
950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
951#endif
952}
953
954
955/**
956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
957 */
958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
959{
960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
962#else
963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
964#endif
965}
966
967
968/**
969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
970 */
971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
972{
973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
975#else
976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
977#endif
978}
979
980
981
982/*********************************************************************************************************************************
983* Helpers: Flat memory fetches and stores. *
984*********************************************************************************************************************************/
985
986/**
987 * Used by TB code to load unsigned 8-bit data w/ flat address.
988 * @note Zero extending the value to 64-bit to simplify assembly.
989 */
990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
991{
992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
994#else
995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
996#endif
997}
998
999
1000/**
1001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1002 * to 16 bits.
1003 * @note Zero extending the value to 64-bit to simplify assembly.
1004 */
1005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1006{
1007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1009#else
1010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1011#endif
1012}
1013
1014
1015/**
1016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1017 * to 32 bits.
1018 * @note Zero extending the value to 64-bit to simplify assembly.
1019 */
1020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1021{
1022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1024#else
1025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1026#endif
1027}
1028
1029
1030/**
1031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1032 * to 64 bits.
1033 */
1034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1035{
1036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1038#else
1039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1040#endif
1041}
1042
1043
1044/**
1045 * Used by TB code to load unsigned 16-bit data w/ flat address.
1046 * @note Zero extending the value to 64-bit to simplify assembly.
1047 */
1048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1049{
1050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1052#else
1053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1054#endif
1055}
1056
1057
1058/**
1059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1060 * to 32 bits.
1061 * @note Zero extending the value to 64-bit to simplify assembly.
1062 */
1063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1064{
1065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1067#else
1068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1069#endif
1070}
1071
1072
1073/**
1074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1075 * to 64 bits.
1076 * @note Zero extending the value to 64-bit to simplify assembly.
1077 */
1078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1079{
1080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1082#else
1083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1084#endif
1085}
1086
1087
1088/**
1089 * Used by TB code to load unsigned 32-bit data w/ flat address.
1090 * @note Zero extending the value to 64-bit to simplify assembly.
1091 */
1092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1093{
1094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1096#else
1097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1098#endif
1099}
1100
1101
1102/**
1103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1104 * to 64 bits.
1105 * @note Zero extending the value to 64-bit to simplify assembly.
1106 */
1107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1108{
1109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1111#else
1112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1113#endif
1114}
1115
1116
1117/**
1118 * Used by TB code to load unsigned 64-bit data w/ flat address.
1119 */
1120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1121{
1122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1124#else
1125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1126#endif
1127}
1128
1129
1130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1131/**
1132 * Used by TB code to load unsigned 128-bit data w/ flat address.
1133 */
1134IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1135{
1136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1137 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1138#else
1139 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1140#endif
1141}
1142
1143
1144/**
1145 * Used by TB code to load unsigned 128-bit data w/ flat address.
1146 */
1147IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1148{
1149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1150 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1151#else
1152 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1153#endif
1154}
1155
1156
1157/**
1158 * Used by TB code to load unsigned 128-bit data w/ flat address.
1159 */
1160IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1161{
1162#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1163 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1164#else
1165 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1166#endif
1167}
1168
1169
1170/**
1171 * Used by TB code to load unsigned 256-bit data w/ flat address.
1172 */
1173IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1174{
1175#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1176 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1177#else
1178 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1179#endif
1180}
1181
1182
1183/**
1184 * Used by TB code to load unsigned 256-bit data w/ flat address.
1185 */
1186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1187{
1188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1189 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1190#else
1191 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1192#endif
1193}
1194#endif
1195
1196
1197/**
1198 * Used by TB code to store unsigned 8-bit data w/ flat address.
1199 */
1200IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1201{
1202#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1203 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1204#else
1205 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1206#endif
1207}
1208
1209
1210/**
1211 * Used by TB code to store unsigned 16-bit data w/ flat address.
1212 */
1213IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1214{
1215#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1216 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1217#else
1218 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1219#endif
1220}
1221
1222
1223/**
1224 * Used by TB code to store unsigned 32-bit data w/ flat address.
1225 */
1226IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1227{
1228#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1229 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1230#else
1231 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1232#endif
1233}
1234
1235
1236/**
1237 * Used by TB code to store unsigned 64-bit data w/ flat address.
1238 */
1239IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1240{
1241#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1242 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1243#else
1244 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1245#endif
1246}
1247
1248
1249#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1250/**
1251 * Used by TB code to store unsigned 128-bit data w/ flat address.
1252 */
1253IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1254{
1255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1256 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1257#else
1258 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1259#endif
1260}
1261
1262
1263/**
1264 * Used by TB code to store unsigned 128-bit data w/ flat address.
1265 */
1266IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1267{
1268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1269 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1270#else
1271 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1272#endif
1273}
1274
1275
1276/**
1277 * Used by TB code to store unsigned 256-bit data w/ flat address.
1278 */
1279IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1280{
1281#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1282 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1283#else
1284 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1285#endif
1286}
1287
1288
1289/**
1290 * Used by TB code to store unsigned 256-bit data w/ flat address.
1291 */
1292IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1293{
1294#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1295 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1296#else
1297 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1298#endif
1299}
1300#endif
1301
1302
1303
1304/**
1305 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1306 */
1307IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1308{
1309#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1310 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1311#else
1312 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1313#endif
1314}
1315
1316
1317/**
1318 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1319 */
1320IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1321{
1322#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1323 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1324#else
1325 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1326#endif
1327}
1328
1329
1330/**
1331 * Used by TB code to store a segment selector value onto a flat stack.
1332 *
1333 * Intel CPUs doesn't do write a whole dword, thus the special function.
1334 */
1335IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1336{
1337#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1338 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1339#else
1340 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1341#endif
1342}
1343
1344
1345/**
1346 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1347 */
1348IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1349{
1350#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1351 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1352#else
1353 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1354#endif
1355}
1356
1357
1358/**
1359 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1360 */
1361IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1362{
1363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1364 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1365#else
1366 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1367#endif
1368}
1369
1370
1371/**
1372 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1373 */
1374IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1375{
1376#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1377 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1378#else
1379 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1380#endif
1381}
1382
1383
1384/**
1385 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1386 */
1387IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1388{
1389#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1390 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1391#else
1392 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1393#endif
1394}
1395
1396
1397
1398/*********************************************************************************************************************************
1399* Helpers: Segmented memory mapping. *
1400*********************************************************************************************************************************/
1401
1402/**
1403 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1404 * segmentation.
1405 */
1406IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1407 RTGCPTR GCPtrMem, uint8_t iSegReg))
1408{
1409#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1410 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1411#else
1412 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1413#endif
1414}
1415
1416
1417/**
1418 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1419 */
1420IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1421 RTGCPTR GCPtrMem, uint8_t iSegReg))
1422{
1423#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1424 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1425#else
1426 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1427#endif
1428}
1429
1430
1431/**
1432 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1433 */
1434IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1435 RTGCPTR GCPtrMem, uint8_t iSegReg))
1436{
1437#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1438 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1439#else
1440 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1441#endif
1442}
1443
1444
1445/**
1446 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1447 */
1448IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1449 RTGCPTR GCPtrMem, uint8_t iSegReg))
1450{
1451#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1452 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1453#else
1454 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1455#endif
1456}
1457
1458
1459/**
1460 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1461 * segmentation.
1462 */
1463IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1464 RTGCPTR GCPtrMem, uint8_t iSegReg))
1465{
1466#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1467 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1468#else
1469 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1470#endif
1471}
1472
1473
1474/**
1475 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1476 */
1477IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1478 RTGCPTR GCPtrMem, uint8_t iSegReg))
1479{
1480#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1481 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1482#else
1483 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1484#endif
1485}
1486
1487
1488/**
1489 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1490 */
1491IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1492 RTGCPTR GCPtrMem, uint8_t iSegReg))
1493{
1494#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1495 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1496#else
1497 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1498#endif
1499}
1500
1501
1502/**
1503 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1504 */
1505IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1506 RTGCPTR GCPtrMem, uint8_t iSegReg))
1507{
1508#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1509 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1510#else
1511 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1512#endif
1513}
1514
1515
1516/**
1517 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1518 * segmentation.
1519 */
1520IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1521 RTGCPTR GCPtrMem, uint8_t iSegReg))
1522{
1523#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1524 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1525#else
1526 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1527#endif
1528}
1529
1530
1531/**
1532 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1533 */
1534IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1535 RTGCPTR GCPtrMem, uint8_t iSegReg))
1536{
1537#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1538 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1539#else
1540 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1541#endif
1542}
1543
1544
1545/**
1546 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1547 */
1548IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1549 RTGCPTR GCPtrMem, uint8_t iSegReg))
1550{
1551#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1552 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1553#else
1554 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1555#endif
1556}
1557
1558
1559/**
1560 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1561 */
1562IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1563 RTGCPTR GCPtrMem, uint8_t iSegReg))
1564{
1565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1566 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1567#else
1568 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1569#endif
1570}
1571
1572
1573/**
1574 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1575 * segmentation.
1576 */
1577IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1578 RTGCPTR GCPtrMem, uint8_t iSegReg))
1579{
1580#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1581 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1582#else
1583 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1584#endif
1585}
1586
1587
1588/**
1589 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1590 */
1591IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1592 RTGCPTR GCPtrMem, uint8_t iSegReg))
1593{
1594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1595 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1596#else
1597 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1598#endif
1599}
1600
1601
1602/**
1603 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1604 */
1605IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1606 RTGCPTR GCPtrMem, uint8_t iSegReg))
1607{
1608#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1609 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1610#else
1611 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1612#endif
1613}
1614
1615
1616/**
1617 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1618 */
1619IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1620 RTGCPTR GCPtrMem, uint8_t iSegReg))
1621{
1622#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1623 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1624#else
1625 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1626#endif
1627}
1628
1629
1630/**
1631 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1632 */
1633IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1634 RTGCPTR GCPtrMem, uint8_t iSegReg))
1635{
1636#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1637 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1638#else
1639 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1640#endif
1641}
1642
1643
1644/**
1645 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1646 */
1647IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1648 RTGCPTR GCPtrMem, uint8_t iSegReg))
1649{
1650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1651 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1652#else
1653 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1660 * segmentation.
1661 */
1662IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1663 RTGCPTR GCPtrMem, uint8_t iSegReg))
1664{
1665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1666 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1667#else
1668 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1669#endif
1670}
1671
1672
1673/**
1674 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1675 */
1676IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1677 RTGCPTR GCPtrMem, uint8_t iSegReg))
1678{
1679#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1680 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1681#else
1682 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1683#endif
1684}
1685
1686
1687/**
1688 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1689 */
1690IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1691 RTGCPTR GCPtrMem, uint8_t iSegReg))
1692{
1693#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1694 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1695#else
1696 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1697#endif
1698}
1699
1700
1701/**
1702 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1703 */
1704IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1705 RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1708 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1709#else
1710 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1711#endif
1712}
1713
1714
1715/*********************************************************************************************************************************
1716* Helpers: Flat memory mapping. *
1717*********************************************************************************************************************************/
1718
1719/**
1720 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1721 * address.
1722 */
1723IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1724{
1725#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1726 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1727#else
1728 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1729#endif
1730}
1731
1732
1733/**
1734 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1735 */
1736IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1737{
1738#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1739 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1740#else
1741 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1742#endif
1743}
1744
1745
1746/**
1747 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1748 */
1749IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1750{
1751#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1752 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1753#else
1754 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1755#endif
1756}
1757
1758
1759/**
1760 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1763{
1764#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1765 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1766#else
1767 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1768#endif
1769}
1770
1771
1772/**
1773 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1774 * address.
1775 */
1776IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1777{
1778#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1779 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1780#else
1781 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1782#endif
1783}
1784
1785
1786/**
1787 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1788 */
1789IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1790{
1791#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1792 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1793#else
1794 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1795#endif
1796}
1797
1798
1799/**
1800 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1801 */
1802IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1803{
1804#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1805 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1806#else
1807 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1808#endif
1809}
1810
1811
1812/**
1813 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1814 */
1815IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1816{
1817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1818 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1819#else
1820 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1821#endif
1822}
1823
1824
1825/**
1826 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1827 * address.
1828 */
1829IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1830{
1831#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1832 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1833#else
1834 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1835#endif
1836}
1837
1838
1839/**
1840 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1841 */
1842IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1843{
1844#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1845 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1846#else
1847 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1848#endif
1849}
1850
1851
1852/**
1853 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1854 */
1855IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1856{
1857#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1858 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1859#else
1860 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1861#endif
1862}
1863
1864
1865/**
1866 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1867 */
1868IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1869{
1870#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1871 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1872#else
1873 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1874#endif
1875}
1876
1877
1878/**
1879 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1880 * address.
1881 */
1882IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1883{
1884#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1885 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1886#else
1887 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1888#endif
1889}
1890
1891
1892/**
1893 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1894 */
1895IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1896{
1897#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1898 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1899#else
1900 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1901#endif
1902}
1903
1904
1905/**
1906 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1907 */
1908IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1909{
1910#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1911 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1912#else
1913 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1914#endif
1915}
1916
1917
1918/**
1919 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1920 */
1921IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1922{
1923#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1924 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1925#else
1926 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1927#endif
1928}
1929
1930
1931/**
1932 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1933 */
1934IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1935{
1936#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1937 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1938#else
1939 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1940#endif
1941}
1942
1943
1944/**
1945 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1946 */
1947IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1948{
1949#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1950 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1951#else
1952 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1953#endif
1954}
1955
1956
1957/**
1958 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1959 * address.
1960 */
1961IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1962{
1963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1964 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1965#else
1966 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1967#endif
1968}
1969
1970
1971/**
1972 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1973 */
1974IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1975{
1976#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1977 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1978#else
1979 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1980#endif
1981}
1982
1983
1984/**
1985 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1986 */
1987IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1988{
1989#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1990 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1991#else
1992 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1993#endif
1994}
1995
1996
1997/**
1998 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1999 */
2000IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2001{
2002#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2003 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2004#else
2005 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2006#endif
2007}
2008
2009
2010/*********************************************************************************************************************************
2011* Helpers: Commit, rollback & unmap *
2012*********************************************************************************************************************************/
2013
2014/**
2015 * Used by TB code to commit and unmap a read-write memory mapping.
2016 */
2017IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2018{
2019 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2020}
2021
2022
2023/**
2024 * Used by TB code to commit and unmap a read-write memory mapping.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2027{
2028 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2029}
2030
2031
2032/**
2033 * Used by TB code to commit and unmap a write-only memory mapping.
2034 */
2035IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2036{
2037 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2038}
2039
2040
2041/**
2042 * Used by TB code to commit and unmap a read-only memory mapping.
2043 */
2044IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2045{
2046 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2047}
2048
2049
2050/**
2051 * Reinitializes the native recompiler state.
2052 *
2053 * Called before starting a new recompile job.
2054 */
2055static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2056{
2057 pReNative->cLabels = 0;
2058 pReNative->bmLabelTypes = 0;
2059 pReNative->cFixups = 0;
2060#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2061 pReNative->cTbExitFixups = 0;
2062#endif
2063#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2064 pReNative->pDbgInfo->cEntries = 0;
2065 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2066#endif
2067 pReNative->pTbOrg = pTb;
2068 pReNative->cCondDepth = 0;
2069 pReNative->uCondSeqNo = 0;
2070 pReNative->uCheckIrqSeqNo = 0;
2071 pReNative->uTlbSeqNo = 0;
2072
2073#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2074 pReNative->Core.offPc = 0;
2075 pReNative->Core.cInstrPcUpdateSkipped = 0;
2076#endif
2077#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2078 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2079#endif
2080 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2081#if IEMNATIVE_HST_GREG_COUNT < 32
2082 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2083#endif
2084 ;
2085 pReNative->Core.bmHstRegsWithGstShadow = 0;
2086 pReNative->Core.bmGstRegShadows = 0;
2087#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2088 pReNative->Core.bmGstRegShadowDirty = 0;
2089#endif
2090 pReNative->Core.bmVars = 0;
2091 pReNative->Core.bmStack = 0;
2092 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2093 pReNative->Core.u64ArgVars = UINT64_MAX;
2094
2095 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 22);
2096 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2097 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2098 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2118
2119 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2120
2121 /* Full host register reinit: */
2122 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2123 {
2124 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2125 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2126 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2127 }
2128
2129 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2130 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2131#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2132 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2133#endif
2134#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2135 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2136#endif
2137#ifdef IEMNATIVE_REG_FIXED_TMP1
2138 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2139#endif
2140#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2141 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2142#endif
2143 );
2144 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2145 {
2146 fRegs &= ~RT_BIT_32(idxReg);
2147 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2148 }
2149
2150 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2151#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2152 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2153#endif
2154#ifdef IEMNATIVE_REG_FIXED_TMP0
2155 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2156#endif
2157#ifdef IEMNATIVE_REG_FIXED_TMP1
2158 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2159#endif
2160#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2161 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2162#endif
2163
2164#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2165 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2166# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2167 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2168# endif
2169 ;
2170 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2171 pReNative->Core.bmGstSimdRegShadows = 0;
2172 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2173 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2174
2175 /* Full host register reinit: */
2176 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2177 {
2178 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2179 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2180 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2181 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2182 }
2183
2184 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2185 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2186 {
2187 fRegs &= ~RT_BIT_32(idxReg);
2188 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2189 }
2190
2191#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2192 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2193#endif
2194
2195#endif
2196
2197 return pReNative;
2198}
2199
2200
2201/**
2202 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2203 */
2204static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2205{
2206 RTMemFree(pReNative->pInstrBuf);
2207 RTMemFree(pReNative->paLabels);
2208 RTMemFree(pReNative->paFixups);
2209#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2210 RTMemFree(pReNative->paTbExitFixups);
2211#endif
2212#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2213 RTMemFree(pReNative->pDbgInfo);
2214#endif
2215 RTMemFree(pReNative);
2216}
2217
2218
2219/**
2220 * Allocates and initializes the native recompiler state.
2221 *
2222 * This is called the first time an EMT wants to recompile something.
2223 *
2224 * @returns Pointer to the new recompiler state.
2225 * @param pVCpu The cross context virtual CPU structure of the calling
2226 * thread.
2227 * @param pTb The TB that's about to be recompiled. When this is NULL,
2228 * the recompiler state is for emitting the common per-chunk
2229 * code from iemNativeRecompileAttachExecMemChunkCtx.
2230 * @thread EMT(pVCpu)
2231 */
2232static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2233{
2234 VMCPU_ASSERT_EMT(pVCpu);
2235
2236 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2237 AssertReturn(pReNative, NULL);
2238
2239 /*
2240 * Try allocate all the buffers and stuff we need.
2241 */
2242 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2243 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2244 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2245 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2246#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2247 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2248#endif
2249#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2250 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2251#endif
2252 if (RT_LIKELY( pReNative->pInstrBuf
2253 && pReNative->paLabels
2254 && pReNative->paFixups)
2255#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2256 && pReNative->paTbExitFixups
2257#endif
2258#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2259 && pReNative->pDbgInfo
2260#endif
2261 )
2262 {
2263 /*
2264 * Set the buffer & array sizes on success.
2265 */
2266 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2267 pReNative->cLabelsAlloc = _8K / cFactor;
2268 pReNative->cFixupsAlloc = _16K / cFactor;
2269#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2270 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2271#endif
2272#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2273 pReNative->cDbgInfoAlloc = _16K / cFactor;
2274#endif
2275
2276 /* Other constant stuff: */
2277 pReNative->pVCpu = pVCpu;
2278
2279 /*
2280 * Done, just reinit it.
2281 */
2282 return iemNativeReInit(pReNative, pTb);
2283 }
2284
2285 /*
2286 * Failed. Cleanup and return.
2287 */
2288 AssertFailed();
2289 iemNativeTerm(pReNative);
2290 return NULL;
2291}
2292
2293
2294/**
2295 * Creates a label
2296 *
2297 * If the label does not yet have a defined position,
2298 * call iemNativeLabelDefine() later to set it.
2299 *
2300 * @returns Label ID. Throws VBox status code on failure, so no need to check
2301 * the return value.
2302 * @param pReNative The native recompile state.
2303 * @param enmType The label type.
2304 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2305 * label is not yet defined (default).
2306 * @param uData Data associated with the lable. Only applicable to
2307 * certain type of labels. Default is zero.
2308 */
2309DECL_HIDDEN_THROW(uint32_t)
2310iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2311 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2312{
2313 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2314#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
2315 Assert(enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2316#endif
2317
2318 /*
2319 * Locate existing label definition.
2320 *
2321 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2322 * and uData is zero.
2323 */
2324 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2325 uint32_t const cLabels = pReNative->cLabels;
2326 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2327#ifndef VBOX_STRICT
2328 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2329 && offWhere == UINT32_MAX
2330 && uData == 0
2331#endif
2332 )
2333 {
2334#ifndef VBOX_STRICT
2335 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2336 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2337 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2338 if (idxLabel < pReNative->cLabels)
2339 return idxLabel;
2340#else
2341 for (uint32_t i = 0; i < cLabels; i++)
2342 if ( paLabels[i].enmType == enmType
2343 && paLabels[i].uData == uData)
2344 {
2345 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2346 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2347 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2348 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2349 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2350 return i;
2351 }
2352 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2353 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2354#endif
2355 }
2356
2357 /*
2358 * Make sure we've got room for another label.
2359 */
2360 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2361 { /* likely */ }
2362 else
2363 {
2364 uint32_t cNew = pReNative->cLabelsAlloc;
2365 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2366 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2367 cNew *= 2;
2368 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2369 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2370 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2371 pReNative->paLabels = paLabels;
2372 pReNative->cLabelsAlloc = cNew;
2373 }
2374
2375 /*
2376 * Define a new label.
2377 */
2378 paLabels[cLabels].off = offWhere;
2379 paLabels[cLabels].enmType = enmType;
2380 paLabels[cLabels].uData = uData;
2381 pReNative->cLabels = cLabels + 1;
2382
2383 Assert((unsigned)enmType < 64);
2384 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2385
2386 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2387 {
2388 Assert(uData == 0);
2389 pReNative->aidxUniqueLabels[enmType] = cLabels;
2390 }
2391
2392 if (offWhere != UINT32_MAX)
2393 {
2394#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2395 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2396 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2397#endif
2398 }
2399 return cLabels;
2400}
2401
2402
2403/**
2404 * Defines the location of an existing label.
2405 *
2406 * @param pReNative The native recompile state.
2407 * @param idxLabel The label to define.
2408 * @param offWhere The position.
2409 */
2410DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2411{
2412 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2413 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2414 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2415 pLabel->off = offWhere;
2416#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2417 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2418 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2419#endif
2420}
2421
2422
2423#if !defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) || !defined(RT_ARCH_AMD64)
2424/**
2425 * Looks up a lable.
2426 *
2427 * @returns Label ID if found, UINT32_MAX if not.
2428 */
2429static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2430 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2431{
2432 Assert((unsigned)enmType < 64);
2433 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2434 {
2435 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2436 return pReNative->aidxUniqueLabels[enmType];
2437
2438 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2439 uint32_t const cLabels = pReNative->cLabels;
2440 for (uint32_t i = 0; i < cLabels; i++)
2441 if ( paLabels[i].enmType == enmType
2442 && paLabels[i].uData == uData
2443 && ( paLabels[i].off == offWhere
2444 || offWhere == UINT32_MAX
2445 || paLabels[i].off == UINT32_MAX))
2446 return i;
2447 }
2448 return UINT32_MAX;
2449}
2450#endif
2451
2452
2453/**
2454 * Adds a fixup.
2455 *
2456 * @throws VBox status code (int) on failure.
2457 * @param pReNative The native recompile state.
2458 * @param offWhere The instruction offset of the fixup location.
2459 * @param idxLabel The target label ID for the fixup.
2460 * @param enmType The fixup type.
2461 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2462 */
2463DECL_HIDDEN_THROW(void)
2464iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2465 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2466{
2467 Assert(idxLabel <= UINT16_MAX);
2468 Assert((unsigned)enmType <= UINT8_MAX);
2469#ifdef RT_ARCH_ARM64
2470 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2471 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2472 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2473#endif
2474
2475 /*
2476 * Make sure we've room.
2477 */
2478 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2479 uint32_t const cFixups = pReNative->cFixups;
2480 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2481 { /* likely */ }
2482 else
2483 {
2484 uint32_t cNew = pReNative->cFixupsAlloc;
2485 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2486 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2487 cNew *= 2;
2488 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2489 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2490 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2491 pReNative->paFixups = paFixups;
2492 pReNative->cFixupsAlloc = cNew;
2493 }
2494
2495 /*
2496 * Add the fixup.
2497 */
2498 paFixups[cFixups].off = offWhere;
2499 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2500 paFixups[cFixups].enmType = enmType;
2501 paFixups[cFixups].offAddend = offAddend;
2502 pReNative->cFixups = cFixups + 1;
2503}
2504
2505
2506#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2507/**
2508 * Adds a fixup to the per chunk tail code.
2509 *
2510 * @throws VBox status code (int) on failure.
2511 * @param pReNative The native recompile state.
2512 * @param offWhere The instruction offset of the fixup location.
2513 * @param enmExitReason The exit reason to jump to.
2514 */
2515DECL_HIDDEN_THROW(void)
2516iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2517{
2518 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2519
2520 /*
2521 * Make sure we've room.
2522 */
2523 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2524 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2525 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2526 { /* likely */ }
2527 else
2528 {
2529 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2530 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2531 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2532 cNew *= 2;
2533 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2534 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2535 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2536 pReNative->paTbExitFixups = paTbExitFixups;
2537 pReNative->cTbExitFixupsAlloc = cNew;
2538 }
2539
2540 /*
2541 * Add the fixup.
2542 */
2543 paTbExitFixups[cTbExitFixups].off = offWhere;
2544 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2545 pReNative->cTbExitFixups = cTbExitFixups + 1;
2546}
2547#endif
2548
2549
2550/**
2551 * Slow code path for iemNativeInstrBufEnsure.
2552 */
2553DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2554{
2555 /* Double the buffer size till we meet the request. */
2556 uint32_t cNew = pReNative->cInstrBufAlloc;
2557 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2558 do
2559 cNew *= 2;
2560 while (cNew < off + cInstrReq);
2561
2562 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2563#ifdef RT_ARCH_ARM64
2564 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2565#else
2566 uint32_t const cbMaxInstrBuf = _2M;
2567#endif
2568 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2569
2570 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2571 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2572
2573#ifdef VBOX_STRICT
2574 pReNative->offInstrBufChecked = off + cInstrReq;
2575#endif
2576 pReNative->cInstrBufAlloc = cNew;
2577 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2578}
2579
2580#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2581
2582/**
2583 * Grows the static debug info array used during recompilation.
2584 *
2585 * @returns Pointer to the new debug info block; throws VBox status code on
2586 * failure, so no need to check the return value.
2587 */
2588DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2589{
2590 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2591 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2592 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2593 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2594 pReNative->pDbgInfo = pDbgInfo;
2595 pReNative->cDbgInfoAlloc = cNew;
2596 return pDbgInfo;
2597}
2598
2599
2600/**
2601 * Adds a new debug info uninitialized entry, returning the pointer to it.
2602 */
2603DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2604{
2605 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2606 { /* likely */ }
2607 else
2608 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2609 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2610}
2611
2612
2613/**
2614 * Debug Info: Adds a native offset record, if necessary.
2615 */
2616DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2617{
2618 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2619
2620 /*
2621 * Do we need this one?
2622 */
2623 uint32_t const offPrev = pDbgInfo->offNativeLast;
2624 if (offPrev == off)
2625 return;
2626 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2627
2628 /*
2629 * Add it.
2630 */
2631 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2632 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2633 pEntry->NativeOffset.offNative = off;
2634 pDbgInfo->offNativeLast = off;
2635}
2636
2637
2638/**
2639 * Debug Info: Record info about a label.
2640 */
2641static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2642{
2643 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2644 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2645 pEntry->Label.uUnused = 0;
2646 pEntry->Label.enmLabel = (uint8_t)enmType;
2647 pEntry->Label.uData = uData;
2648}
2649
2650
2651/**
2652 * Debug Info: Record info about a threaded call.
2653 */
2654static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2655{
2656 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2657 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2658 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2659 pEntry->ThreadedCall.uUnused = 0;
2660 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2661}
2662
2663
2664/**
2665 * Debug Info: Record info about a new guest instruction.
2666 */
2667static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2668{
2669 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2670 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2671 pEntry->GuestInstruction.uUnused = 0;
2672 pEntry->GuestInstruction.fExec = fExec;
2673}
2674
2675
2676/**
2677 * Debug Info: Record info about guest register shadowing.
2678 */
2679DECL_HIDDEN_THROW(void)
2680iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2681 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2682{
2683 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2684 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2685 pEntry->GuestRegShadowing.uUnused = 0;
2686 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2687 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2688 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2689#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2690 Assert( idxHstReg != UINT8_MAX
2691 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2692#endif
2693}
2694
2695
2696# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2697/**
2698 * Debug Info: Record info about guest register shadowing.
2699 */
2700DECL_HIDDEN_THROW(void)
2701iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2702 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2703{
2704 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2705 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2706 pEntry->GuestSimdRegShadowing.uUnused = 0;
2707 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2708 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2709 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2710}
2711# endif
2712
2713
2714# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2715/**
2716 * Debug Info: Record info about delayed RIP updates.
2717 */
2718DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2719{
2720 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2721 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2722 pEntry->DelayedPcUpdate.offPc = offPc;
2723 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2724}
2725# endif
2726
2727# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2728
2729/**
2730 * Debug Info: Record info about a dirty guest register.
2731 */
2732DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2733 uint8_t idxGstReg, uint8_t idxHstReg)
2734{
2735 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2736 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2737 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2738 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2739 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2740}
2741
2742
2743/**
2744 * Debug Info: Record info about a dirty guest register writeback operation.
2745 */
2746DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2747{
2748 unsigned const cBitsGstRegMask = 25;
2749 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2750
2751 /* The first block of 25 bits: */
2752 if (fGstReg & fGstRegMask)
2753 {
2754 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2755 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2756 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2757 pEntry->GuestRegWriteback.cShift = 0;
2758 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2759 fGstReg &= ~(uint64_t)fGstRegMask;
2760 if (!fGstReg)
2761 return;
2762 }
2763
2764 /* The second block of 25 bits: */
2765 fGstReg >>= cBitsGstRegMask;
2766 if (fGstReg & fGstRegMask)
2767 {
2768 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2769 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2770 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2771 pEntry->GuestRegWriteback.cShift = 0;
2772 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2773 fGstReg &= ~(uint64_t)fGstRegMask;
2774 if (!fGstReg)
2775 return;
2776 }
2777
2778 /* The last block with 14 bits: */
2779 fGstReg >>= cBitsGstRegMask;
2780 Assert(fGstReg & fGstRegMask);
2781 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2782 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2783 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2784 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2785 pEntry->GuestRegWriteback.cShift = 2;
2786 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2787}
2788
2789# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2790
2791#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2792
2793
2794/*********************************************************************************************************************************
2795* Register Allocator *
2796*********************************************************************************************************************************/
2797
2798/**
2799 * Register parameter indexes (indexed by argument number).
2800 */
2801DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2802{
2803 IEMNATIVE_CALL_ARG0_GREG,
2804 IEMNATIVE_CALL_ARG1_GREG,
2805 IEMNATIVE_CALL_ARG2_GREG,
2806 IEMNATIVE_CALL_ARG3_GREG,
2807#if defined(IEMNATIVE_CALL_ARG4_GREG)
2808 IEMNATIVE_CALL_ARG4_GREG,
2809# if defined(IEMNATIVE_CALL_ARG5_GREG)
2810 IEMNATIVE_CALL_ARG5_GREG,
2811# if defined(IEMNATIVE_CALL_ARG6_GREG)
2812 IEMNATIVE_CALL_ARG6_GREG,
2813# if defined(IEMNATIVE_CALL_ARG7_GREG)
2814 IEMNATIVE_CALL_ARG7_GREG,
2815# endif
2816# endif
2817# endif
2818#endif
2819};
2820AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2821
2822/**
2823 * Call register masks indexed by argument count.
2824 */
2825DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2826{
2827 0,
2828 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2829 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2830 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2831 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2832 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2833#if defined(IEMNATIVE_CALL_ARG4_GREG)
2834 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2835 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2836# if defined(IEMNATIVE_CALL_ARG5_GREG)
2837 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2838 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2839# if defined(IEMNATIVE_CALL_ARG6_GREG)
2840 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2841 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2842 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2843# if defined(IEMNATIVE_CALL_ARG7_GREG)
2844 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2845 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2846 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2847# endif
2848# endif
2849# endif
2850#endif
2851};
2852
2853#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2854/**
2855 * BP offset of the stack argument slots.
2856 *
2857 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2858 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2859 */
2860DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2861{
2862 IEMNATIVE_FP_OFF_STACK_ARG0,
2863# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2864 IEMNATIVE_FP_OFF_STACK_ARG1,
2865# endif
2866# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2867 IEMNATIVE_FP_OFF_STACK_ARG2,
2868# endif
2869# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2870 IEMNATIVE_FP_OFF_STACK_ARG3,
2871# endif
2872};
2873AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2874#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2875
2876/**
2877 * Info about shadowed guest register values.
2878 * @see IEMNATIVEGSTREG
2879 */
2880DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2881{
2882#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2883 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2884 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2885 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2886 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2887 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2888 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2889 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2897 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2898 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2899 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2900 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2901 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2902 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2903 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2904 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2905 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2906 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2907 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2908 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2909 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2910 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2911 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2912 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2913 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2914 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2915 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2916 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2917 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2918 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2919 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2920 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2921 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2922 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2923 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2924 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2925 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2926 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2927 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2928 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2929 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2930 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2931#undef CPUMCTX_OFF_AND_SIZE
2932};
2933AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2934
2935
2936/** Host CPU general purpose register names. */
2937DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2938{
2939#ifdef RT_ARCH_AMD64
2940 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2941#elif RT_ARCH_ARM64
2942 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2943 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2944#else
2945# error "port me"
2946#endif
2947};
2948
2949
2950#if 0 /* unused */
2951/**
2952 * Tries to locate a suitable register in the given register mask.
2953 *
2954 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2955 * failed.
2956 *
2957 * @returns Host register number on success, returns UINT8_MAX on failure.
2958 */
2959static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2960{
2961 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2962 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2963 if (fRegs)
2964 {
2965 /** @todo pick better here: */
2966 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2967
2968 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2969 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2970 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2971 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2972
2973 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2974 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2975 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2976 return idxReg;
2977 }
2978 return UINT8_MAX;
2979}
2980#endif /* unused */
2981
2982
2983#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2984/**
2985 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2986 *
2987 * @returns New code buffer offset on success, UINT32_MAX on failure.
2988 * @param pReNative .
2989 * @param off The current code buffer position.
2990 * @param enmGstReg The guest register to store to.
2991 * @param idxHstReg The host register to store from.
2992 */
2993DECL_FORCE_INLINE_THROW(uint32_t)
2994iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2995{
2996 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2997 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2998
2999 switch (g_aGstShadowInfo[enmGstReg].cb)
3000 {
3001 case sizeof(uint64_t):
3002 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3003 case sizeof(uint32_t):
3004 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3005 case sizeof(uint16_t):
3006 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3007#if 0 /* not present in the table. */
3008 case sizeof(uint8_t):
3009 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3010#endif
3011 default:
3012 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3013 }
3014}
3015
3016
3017/**
3018 * Emits code to flush a pending write of the given guest register if any.
3019 *
3020 * @returns New code buffer offset.
3021 * @param pReNative The native recompile state.
3022 * @param off Current code buffer position.
3023 * @param enmGstReg The guest register to flush.
3024 */
3025DECL_HIDDEN_THROW(uint32_t)
3026iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3027{
3028 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3029
3030 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3031 && enmGstReg <= kIemNativeGstReg_GprLast)
3032 || enmGstReg == kIemNativeGstReg_MxCsr);
3033 Assert( idxHstReg != UINT8_MAX
3034 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3035 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3036 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3037
3038 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3039
3040 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3041 return off;
3042}
3043
3044
3045/**
3046 * Flush the given set of guest registers if marked as dirty.
3047 *
3048 * @returns New code buffer offset.
3049 * @param pReNative The native recompile state.
3050 * @param off Current code buffer position.
3051 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3052 */
3053DECL_HIDDEN_THROW(uint32_t)
3054iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3055{
3056 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3057 if (bmGstRegShadowDirty)
3058 {
3059# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3060 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3061 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3062# endif
3063 do
3064 {
3065 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3066 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3067 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3068 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3069 } while (bmGstRegShadowDirty);
3070 }
3071
3072 return off;
3073}
3074
3075
3076/**
3077 * Flush all shadowed guest registers marked as dirty for the given host register.
3078 *
3079 * @returns New code buffer offset.
3080 * @param pReNative The native recompile state.
3081 * @param off Current code buffer position.
3082 * @param idxHstReg The host register.
3083 *
3084 * @note This doesn't do any unshadowing of guest registers from the host register.
3085 */
3086DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3087{
3088 /* We need to flush any pending guest register writes this host register shadows. */
3089 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3090 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3091 {
3092# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3093 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3094 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3095# endif
3096 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
3097 * likely to only have a single bit set. It'll be in the 0..15 range,
3098 * but still it's 15 unnecessary loops for the last guest register. */
3099
3100 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3101 do
3102 {
3103 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3104 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3105 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3106 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3107 } while (bmGstRegShadowDirty);
3108 }
3109
3110 return off;
3111}
3112#endif
3113
3114
3115/**
3116 * Locate a register, possibly freeing one up.
3117 *
3118 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3119 * failed.
3120 *
3121 * @returns Host register number on success. Returns UINT8_MAX if no registers
3122 * found, the caller is supposed to deal with this and raise a
3123 * allocation type specific status code (if desired).
3124 *
3125 * @throws VBox status code if we're run into trouble spilling a variable of
3126 * recording debug info. Does NOT throw anything if we're out of
3127 * registers, though.
3128 */
3129static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3130 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3131{
3132 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3133 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3134 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3135
3136 /*
3137 * Try a freed register that's shadowing a guest register.
3138 */
3139 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3140 if (fRegs)
3141 {
3142 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3143
3144#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3145 /*
3146 * When we have livness information, we use it to kick out all shadowed
3147 * guest register that will not be needed any more in this TB. If we're
3148 * lucky, this may prevent us from ending up here again.
3149 *
3150 * Note! We must consider the previous entry here so we don't free
3151 * anything that the current threaded function requires (current
3152 * entry is produced by the next threaded function).
3153 */
3154 uint32_t const idxCurCall = pReNative->idxCurCall;
3155 if (idxCurCall > 0)
3156 {
3157 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3158
3159# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3160 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3161 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3162 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3163#else
3164 /* Construct a mask of the registers not in the read or write state.
3165 Note! We could skips writes, if they aren't from us, as this is just
3166 a hack to prevent trashing registers that have just been written
3167 or will be written when we retire the current instruction. */
3168 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3169 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3170 & IEMLIVENESSBIT_MASK;
3171#endif
3172 /* Merge EFLAGS. */
3173 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3174 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3175 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3176 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3177 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3178
3179 /* If it matches any shadowed registers. */
3180 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3181 {
3182#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3183 /* Writeback any dirty shadow registers we are about to unshadow. */
3184 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3185#endif
3186
3187 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3188 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3189 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3190
3191 /* See if we've got any unshadowed registers we can return now. */
3192 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3193 if (fUnshadowedRegs)
3194 {
3195 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3196 return (fPreferVolatile
3197 ? ASMBitFirstSetU32(fUnshadowedRegs)
3198 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3199 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3200 - 1;
3201 }
3202 }
3203 }
3204#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3205
3206 unsigned const idxReg = (fPreferVolatile
3207 ? ASMBitFirstSetU32(fRegs)
3208 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3209 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3210 - 1;
3211
3212 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3213 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3214 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3215 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3216
3217#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3218 /* We need to flush any pending guest register writes this host register shadows. */
3219 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3220#endif
3221
3222 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3223 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3224 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3225 return idxReg;
3226 }
3227
3228 /*
3229 * Try free up a variable that's in a register.
3230 *
3231 * We do two rounds here, first evacuating variables we don't need to be
3232 * saved on the stack, then in the second round move things to the stack.
3233 */
3234 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3235 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3236 {
3237 uint32_t fVars = pReNative->Core.bmVars;
3238 while (fVars)
3239 {
3240 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3241 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3242#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3243 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3244 continue;
3245#endif
3246
3247 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3248 && (RT_BIT_32(idxReg) & fRegMask)
3249 && ( iLoop == 0
3250 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3251 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3252 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3253 {
3254 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3255 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3256 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3257 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3258 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3259 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3260#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3261 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3262#endif
3263
3264 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3265 {
3266 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3267 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3268 }
3269
3270 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3271 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3272
3273 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3274 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3275 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3276 return idxReg;
3277 }
3278 fVars &= ~RT_BIT_32(idxVar);
3279 }
3280 }
3281
3282 return UINT8_MAX;
3283}
3284
3285
3286/**
3287 * Reassigns a variable to a different register specified by the caller.
3288 *
3289 * @returns The new code buffer position.
3290 * @param pReNative The native recompile state.
3291 * @param off The current code buffer position.
3292 * @param idxVar The variable index.
3293 * @param idxRegOld The old host register number.
3294 * @param idxRegNew The new host register number.
3295 * @param pszCaller The caller for logging.
3296 */
3297static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3298 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3299{
3300 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3301 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3302#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3303 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3304#endif
3305 RT_NOREF(pszCaller);
3306
3307#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3308 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3309#endif
3310 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3311
3312 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3313#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3314 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3315#endif
3316 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3317 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3318 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3319
3320 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3321 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3322 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3323 if (fGstRegShadows)
3324 {
3325 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3326 | RT_BIT_32(idxRegNew);
3327 while (fGstRegShadows)
3328 {
3329 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3330 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3331
3332 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3333 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3334 }
3335 }
3336
3337 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3338 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3339 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3340 return off;
3341}
3342
3343
3344/**
3345 * Moves a variable to a different register or spills it onto the stack.
3346 *
3347 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3348 * kinds can easily be recreated if needed later.
3349 *
3350 * @returns The new code buffer position.
3351 * @param pReNative The native recompile state.
3352 * @param off The current code buffer position.
3353 * @param idxVar The variable index.
3354 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3355 * call-volatile registers.
3356 */
3357DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3358 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3359{
3360 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3361 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3362 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3363 Assert(!pVar->fRegAcquired);
3364
3365 uint8_t const idxRegOld = pVar->idxReg;
3366 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3367 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3368 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3369 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3370 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3371 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3372 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3373 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3374#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3375 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3376#endif
3377
3378
3379 /** @todo Add statistics on this.*/
3380 /** @todo Implement basic variable liveness analysis (python) so variables
3381 * can be freed immediately once no longer used. This has the potential to
3382 * be trashing registers and stack for dead variables.
3383 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3384
3385 /*
3386 * First try move it to a different register, as that's cheaper.
3387 */
3388 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3389 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3390 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3391 if (fRegs)
3392 {
3393 /* Avoid using shadow registers, if possible. */
3394 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3395 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3396 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3397 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3398 }
3399
3400 /*
3401 * Otherwise we must spill the register onto the stack.
3402 */
3403 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3404 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3405 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3406 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3407
3408 pVar->idxReg = UINT8_MAX;
3409 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3410 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3411 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3412 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3413 return off;
3414}
3415
3416
3417/**
3418 * Allocates a temporary host general purpose register.
3419 *
3420 * This may emit code to save register content onto the stack in order to free
3421 * up a register.
3422 *
3423 * @returns The host register number; throws VBox status code on failure,
3424 * so no need to check the return value.
3425 * @param pReNative The native recompile state.
3426 * @param poff Pointer to the variable with the code buffer position.
3427 * This will be update if we need to move a variable from
3428 * register to stack in order to satisfy the request.
3429 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3430 * registers (@c true, default) or the other way around
3431 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3432 */
3433DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3434{
3435 /*
3436 * Try find a completely unused register, preferably a call-volatile one.
3437 */
3438 uint8_t idxReg;
3439 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3440 & ~pReNative->Core.bmHstRegsWithGstShadow
3441 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3442 if (fRegs)
3443 {
3444 if (fPreferVolatile)
3445 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3446 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3447 else
3448 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3449 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3450 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3451 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3452 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3453 }
3454 else
3455 {
3456 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3457 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3458 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3459 }
3460 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3461}
3462
3463
3464/**
3465 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3466 * registers.
3467 *
3468 * @returns The host register number; throws VBox status code on failure,
3469 * so no need to check the return value.
3470 * @param pReNative The native recompile state.
3471 * @param poff Pointer to the variable with the code buffer position.
3472 * This will be update if we need to move a variable from
3473 * register to stack in order to satisfy the request.
3474 * @param fRegMask Mask of acceptable registers.
3475 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3476 * registers (@c true, default) or the other way around
3477 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3478 */
3479DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3480 bool fPreferVolatile /*= true*/)
3481{
3482 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3483 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3484
3485 /*
3486 * Try find a completely unused register, preferably a call-volatile one.
3487 */
3488 uint8_t idxReg;
3489 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3490 & ~pReNative->Core.bmHstRegsWithGstShadow
3491 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3492 & fRegMask;
3493 if (fRegs)
3494 {
3495 if (fPreferVolatile)
3496 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3497 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3498 else
3499 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3500 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3501 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3502 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3503 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3504 }
3505 else
3506 {
3507 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3508 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3509 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3510 }
3511 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3512}
3513
3514
3515/**
3516 * Allocates a temporary register for loading an immediate value into.
3517 *
3518 * This will emit code to load the immediate, unless there happens to be an
3519 * unused register with the value already loaded.
3520 *
3521 * The caller will not modify the returned register, it must be considered
3522 * read-only. Free using iemNativeRegFreeTmpImm.
3523 *
3524 * @returns The host register number; throws VBox status code on failure, so no
3525 * need to check the return value.
3526 * @param pReNative The native recompile state.
3527 * @param poff Pointer to the variable with the code buffer position.
3528 * @param uImm The immediate value that the register must hold upon
3529 * return.
3530 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3531 * registers (@c true, default) or the other way around
3532 * (@c false).
3533 *
3534 * @note Reusing immediate values has not been implemented yet.
3535 */
3536DECL_HIDDEN_THROW(uint8_t)
3537iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3538{
3539 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3540 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3541 return idxReg;
3542}
3543
3544
3545/**
3546 * Allocates a temporary host general purpose register for keeping a guest
3547 * register value.
3548 *
3549 * Since we may already have a register holding the guest register value,
3550 * code will be emitted to do the loading if that's not the case. Code may also
3551 * be emitted if we have to free up a register to satify the request.
3552 *
3553 * @returns The host register number; throws VBox status code on failure, so no
3554 * need to check the return value.
3555 * @param pReNative The native recompile state.
3556 * @param poff Pointer to the variable with the code buffer
3557 * position. This will be update if we need to move a
3558 * variable from register to stack in order to satisfy
3559 * the request.
3560 * @param enmGstReg The guest register that will is to be updated.
3561 * @param enmIntendedUse How the caller will be using the host register.
3562 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3563 * register is okay (default). The ASSUMPTION here is
3564 * that the caller has already flushed all volatile
3565 * registers, so this is only applied if we allocate a
3566 * new register.
3567 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3568 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3569 */
3570DECL_HIDDEN_THROW(uint8_t)
3571iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3572 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3573 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3574{
3575 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3576#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3577 AssertMsg( fSkipLivenessAssert
3578 || pReNative->idxCurCall == 0
3579 || enmGstReg == kIemNativeGstReg_Pc
3580 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3581 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3582 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3583 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3584 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3585 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3586#endif
3587 RT_NOREF(fSkipLivenessAssert);
3588#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3589 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3590#endif
3591 uint32_t const fRegMask = !fNoVolatileRegs
3592 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3593 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3594
3595 /*
3596 * First check if the guest register value is already in a host register.
3597 */
3598 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3599 {
3600 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3601 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3602 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3603 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3604
3605 /* It's not supposed to be allocated... */
3606 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3607 {
3608 /*
3609 * If the register will trash the guest shadow copy, try find a
3610 * completely unused register we can use instead. If that fails,
3611 * we need to disassociate the host reg from the guest reg.
3612 */
3613 /** @todo would be nice to know if preserving the register is in any way helpful. */
3614 /* If the purpose is calculations, try duplicate the register value as
3615 we'll be clobbering the shadow. */
3616 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3617 && ( ~pReNative->Core.bmHstRegs
3618 & ~pReNative->Core.bmHstRegsWithGstShadow
3619 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3620 {
3621 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3622
3623 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3624
3625 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3626 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3627 g_apszIemNativeHstRegNames[idxRegNew]));
3628 idxReg = idxRegNew;
3629 }
3630 /* If the current register matches the restrictions, go ahead and allocate
3631 it for the caller. */
3632 else if (fRegMask & RT_BIT_32(idxReg))
3633 {
3634 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3635 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3636 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3637 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3638 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3639 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3640 else
3641 {
3642 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3643 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3644 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3645 }
3646 }
3647 /* Otherwise, allocate a register that satisfies the caller and transfer
3648 the shadowing if compatible with the intended use. (This basically
3649 means the call wants a non-volatile register (RSP push/pop scenario).) */
3650 else
3651 {
3652 Assert(fNoVolatileRegs);
3653 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3654 !fNoVolatileRegs
3655 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3656 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3657 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3658 {
3659 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3660 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3661 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3662 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3663 }
3664 else
3665 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3666 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3667 g_apszIemNativeHstRegNames[idxRegNew]));
3668 idxReg = idxRegNew;
3669 }
3670 }
3671 else
3672 {
3673 /*
3674 * Oops. Shadowed guest register already allocated!
3675 *
3676 * Allocate a new register, copy the value and, if updating, the
3677 * guest shadow copy assignment to the new register.
3678 */
3679 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3680 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3681 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3682 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3683
3684 /** @todo share register for readonly access. */
3685 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3686 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3687
3688 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3689 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3690
3691 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3692 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3693 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3694 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3695 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3696 else
3697 {
3698 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3699 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3700 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3701 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3702 }
3703 idxReg = idxRegNew;
3704 }
3705 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3706
3707#ifdef VBOX_STRICT
3708 /* Strict builds: Check that the value is correct. */
3709 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3710#endif
3711
3712#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3713 /** @todo r=aeichner Implement for registers other than GPR as well. */
3714 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3715 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3716 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3717 && enmGstReg <= kIemNativeGstReg_GprLast)
3718 || enmGstReg == kIemNativeGstReg_MxCsr))
3719 {
3720# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3721 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3722 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3723# endif
3724 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3725 }
3726#endif
3727
3728 return idxReg;
3729 }
3730
3731 /*
3732 * Allocate a new register, load it with the guest value and designate it as a copy of the
3733 */
3734 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3735
3736 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3737 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3738
3739 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3740 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3741 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3742 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3743
3744#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3745 /** @todo r=aeichner Implement for registers other than GPR as well. */
3746 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3747 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3748 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3749 && enmGstReg <= kIemNativeGstReg_GprLast)
3750 || enmGstReg == kIemNativeGstReg_MxCsr))
3751 {
3752# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3753 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3754 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3755# endif
3756 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3757 }
3758#endif
3759
3760 return idxRegNew;
3761}
3762
3763
3764/**
3765 * Allocates a temporary host general purpose register that already holds the
3766 * given guest register value.
3767 *
3768 * The use case for this function is places where the shadowing state cannot be
3769 * modified due to branching and such. This will fail if the we don't have a
3770 * current shadow copy handy or if it's incompatible. The only code that will
3771 * be emitted here is value checking code in strict builds.
3772 *
3773 * The intended use can only be readonly!
3774 *
3775 * @returns The host register number, UINT8_MAX if not present.
3776 * @param pReNative The native recompile state.
3777 * @param poff Pointer to the instruction buffer offset.
3778 * Will be updated in strict builds if a register is
3779 * found.
3780 * @param enmGstReg The guest register that will is to be updated.
3781 * @note In strict builds, this may throw instruction buffer growth failures.
3782 * Non-strict builds will not throw anything.
3783 * @sa iemNativeRegAllocTmpForGuestReg
3784 */
3785DECL_HIDDEN_THROW(uint8_t)
3786iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3787{
3788 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3789#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3790 AssertMsg( pReNative->idxCurCall == 0
3791 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3792 || enmGstReg == kIemNativeGstReg_Pc,
3793 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3794#endif
3795
3796 /*
3797 * First check if the guest register value is already in a host register.
3798 */
3799 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3800 {
3801 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3802 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3803 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3804 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3805
3806 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3807 {
3808 /*
3809 * We only do readonly use here, so easy compared to the other
3810 * variant of this code.
3811 */
3812 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3813 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3814 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3815 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3816 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3817
3818#ifdef VBOX_STRICT
3819 /* Strict builds: Check that the value is correct. */
3820 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3821#else
3822 RT_NOREF(poff);
3823#endif
3824 return idxReg;
3825 }
3826 }
3827
3828 return UINT8_MAX;
3829}
3830
3831
3832/**
3833 * Allocates argument registers for a function call.
3834 *
3835 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3836 * need to check the return value.
3837 * @param pReNative The native recompile state.
3838 * @param off The current code buffer offset.
3839 * @param cArgs The number of arguments the function call takes.
3840 */
3841DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3842{
3843 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3844 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3845 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3846 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3847
3848 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3849 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3850 else if (cArgs == 0)
3851 return true;
3852
3853 /*
3854 * Do we get luck and all register are free and not shadowing anything?
3855 */
3856 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3857 for (uint32_t i = 0; i < cArgs; i++)
3858 {
3859 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3860 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3861 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3862 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3863 }
3864 /*
3865 * Okay, not lucky so we have to free up the registers.
3866 */
3867 else
3868 for (uint32_t i = 0; i < cArgs; i++)
3869 {
3870 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3871 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3872 {
3873 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3874 {
3875 case kIemNativeWhat_Var:
3876 {
3877 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3878 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3879 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3880 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3881 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3882#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3883 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3884#endif
3885
3886 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3887 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3888 else
3889 {
3890 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3891 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3892 }
3893 break;
3894 }
3895
3896 case kIemNativeWhat_Tmp:
3897 case kIemNativeWhat_Arg:
3898 case kIemNativeWhat_rc:
3899 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3900 default:
3901 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3902 }
3903
3904 }
3905 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3906 {
3907 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3908 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3909 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3910#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3911 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3912#endif
3913 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3914 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3915 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3916 }
3917 else
3918 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3919 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3920 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3921 }
3922 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3923 return true;
3924}
3925
3926
3927DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3928
3929
3930#if 0
3931/**
3932 * Frees a register assignment of any type.
3933 *
3934 * @param pReNative The native recompile state.
3935 * @param idxHstReg The register to free.
3936 *
3937 * @note Does not update variables.
3938 */
3939DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3940{
3941 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3942 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3943 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3944 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3945 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3946 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3947 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3948 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3949 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3950 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3951 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3952 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3953 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3954 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3955
3956 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3957 /* no flushing, right:
3958 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3959 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3960 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3961 */
3962}
3963#endif
3964
3965
3966/**
3967 * Frees a temporary register.
3968 *
3969 * Any shadow copies of guest registers assigned to the host register will not
3970 * be flushed by this operation.
3971 */
3972DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3973{
3974 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3975 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3976 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3977 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3978 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3979}
3980
3981
3982/**
3983 * Frees a temporary immediate register.
3984 *
3985 * It is assumed that the call has not modified the register, so it still hold
3986 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3987 */
3988DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3989{
3990 iemNativeRegFreeTmp(pReNative, idxHstReg);
3991}
3992
3993
3994/**
3995 * Frees a register assigned to a variable.
3996 *
3997 * The register will be disassociated from the variable.
3998 */
3999DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4000{
4001 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4002 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4003 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4004 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4005 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4006#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4007 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4008#endif
4009
4010 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4011 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4012 if (!fFlushShadows)
4013 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4014 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4015 else
4016 {
4017 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4018 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4019#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4020 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4021#endif
4022 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4023 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4024 uint64_t fGstRegShadows = fGstRegShadowsOld;
4025 while (fGstRegShadows)
4026 {
4027 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4028 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4029
4030 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4031 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4032 }
4033 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4034 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4035 }
4036}
4037
4038
4039#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4040# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4041/** Host CPU SIMD register names. */
4042DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4043{
4044# ifdef RT_ARCH_AMD64
4045 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4046# elif RT_ARCH_ARM64
4047 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4048 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4049# else
4050# error "port me"
4051# endif
4052};
4053# endif
4054
4055
4056/**
4057 * Frees a SIMD register assigned to a variable.
4058 *
4059 * The register will be disassociated from the variable.
4060 */
4061DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4062{
4063 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4064 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4065 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4066 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4067 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4068 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4069
4070 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4071 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4072 if (!fFlushShadows)
4073 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4074 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4075 else
4076 {
4077 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4078 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4079 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4080 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4081 uint64_t fGstRegShadows = fGstRegShadowsOld;
4082 while (fGstRegShadows)
4083 {
4084 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4085 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4086
4087 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4088 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4089 }
4090 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4091 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4092 }
4093}
4094
4095
4096/**
4097 * Reassigns a variable to a different SIMD register specified by the caller.
4098 *
4099 * @returns The new code buffer position.
4100 * @param pReNative The native recompile state.
4101 * @param off The current code buffer position.
4102 * @param idxVar The variable index.
4103 * @param idxRegOld The old host register number.
4104 * @param idxRegNew The new host register number.
4105 * @param pszCaller The caller for logging.
4106 */
4107static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4108 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4109{
4110 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4111 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4112 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4113 RT_NOREF(pszCaller);
4114
4115 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4116 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4117 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4118
4119 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4120 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4121 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4122
4123 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4124 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4125 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4126
4127 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4128 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4129 else
4130 {
4131 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4132 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4133 }
4134
4135 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4136 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4137 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4138 if (fGstRegShadows)
4139 {
4140 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4141 | RT_BIT_32(idxRegNew);
4142 while (fGstRegShadows)
4143 {
4144 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4145 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4146
4147 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4148 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4149 }
4150 }
4151
4152 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4153 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4154 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4155 return off;
4156}
4157
4158
4159/**
4160 * Moves a variable to a different register or spills it onto the stack.
4161 *
4162 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4163 * kinds can easily be recreated if needed later.
4164 *
4165 * @returns The new code buffer position.
4166 * @param pReNative The native recompile state.
4167 * @param off The current code buffer position.
4168 * @param idxVar The variable index.
4169 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4170 * call-volatile registers.
4171 */
4172DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4173 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4174{
4175 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4176 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4177 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4178 Assert(!pVar->fRegAcquired);
4179 Assert(!pVar->fSimdReg);
4180
4181 uint8_t const idxRegOld = pVar->idxReg;
4182 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4183 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4184 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4185 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4186 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4187 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4188 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4189 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4190 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4191 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4192
4193 /** @todo Add statistics on this.*/
4194 /** @todo Implement basic variable liveness analysis (python) so variables
4195 * can be freed immediately once no longer used. This has the potential to
4196 * be trashing registers and stack for dead variables.
4197 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4198
4199 /*
4200 * First try move it to a different register, as that's cheaper.
4201 */
4202 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4203 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4204 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4205 if (fRegs)
4206 {
4207 /* Avoid using shadow registers, if possible. */
4208 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4209 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4210 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4211 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4212 }
4213
4214 /*
4215 * Otherwise we must spill the register onto the stack.
4216 */
4217 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4218 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4219 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4220
4221 if (pVar->cbVar == sizeof(RTUINT128U))
4222 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4223 else
4224 {
4225 Assert(pVar->cbVar == sizeof(RTUINT256U));
4226 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4227 }
4228
4229 pVar->idxReg = UINT8_MAX;
4230 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4231 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4232 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4233 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4234 return off;
4235}
4236
4237
4238/**
4239 * Called right before emitting a call instruction to move anything important
4240 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4241 * optionally freeing argument variables.
4242 *
4243 * @returns New code buffer offset, UINT32_MAX on failure.
4244 * @param pReNative The native recompile state.
4245 * @param off The code buffer offset.
4246 * @param cArgs The number of arguments the function call takes.
4247 * It is presumed that the host register part of these have
4248 * been allocated as such already and won't need moving,
4249 * just freeing.
4250 * @param fKeepVars Mask of variables that should keep their register
4251 * assignments. Caller must take care to handle these.
4252 */
4253DECL_HIDDEN_THROW(uint32_t)
4254iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4255{
4256 Assert(!cArgs); RT_NOREF(cArgs);
4257
4258 /* fKeepVars will reduce this mask. */
4259 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4260
4261 /*
4262 * Move anything important out of volatile registers.
4263 */
4264 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4265#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4266 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4267#endif
4268 ;
4269
4270 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4271 if (!fSimdRegsToMove)
4272 { /* likely */ }
4273 else
4274 {
4275 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4276 while (fSimdRegsToMove != 0)
4277 {
4278 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4279 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4280
4281 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4282 {
4283 case kIemNativeWhat_Var:
4284 {
4285 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4286 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4287 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4288 Assert(pVar->idxReg == idxSimdReg);
4289 Assert(pVar->fSimdReg);
4290 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4291 {
4292 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4293 idxVar, pVar->enmKind, pVar->idxReg));
4294 if (pVar->enmKind != kIemNativeVarKind_Stack)
4295 pVar->idxReg = UINT8_MAX;
4296 else
4297 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4298 }
4299 else
4300 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4301 continue;
4302 }
4303
4304 case kIemNativeWhat_Arg:
4305 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4306 continue;
4307
4308 case kIemNativeWhat_rc:
4309 case kIemNativeWhat_Tmp:
4310 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4311 continue;
4312
4313 case kIemNativeWhat_FixedReserved:
4314#ifdef RT_ARCH_ARM64
4315 continue; /* On ARM the upper half of the virtual 256-bit register. */
4316#endif
4317
4318 case kIemNativeWhat_FixedTmp:
4319 case kIemNativeWhat_pVCpuFixed:
4320 case kIemNativeWhat_pCtxFixed:
4321 case kIemNativeWhat_PcShadow:
4322 case kIemNativeWhat_Invalid:
4323 case kIemNativeWhat_End:
4324 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4325 }
4326 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4327 }
4328 }
4329
4330 /*
4331 * Do the actual freeing.
4332 */
4333 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4334 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4335 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4336 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4337
4338 /* If there are guest register shadows in any call-volatile register, we
4339 have to clear the corrsponding guest register masks for each register. */
4340 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4341 if (fHstSimdRegsWithGstShadow)
4342 {
4343 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4344 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4345 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4346 do
4347 {
4348 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4349 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4350
4351 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4352
4353#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4354 /*
4355 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4356 * to call volatile registers).
4357 */
4358 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4359 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4360 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4361#endif
4362 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4363 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4364
4365 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4366 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4367 } while (fHstSimdRegsWithGstShadow != 0);
4368 }
4369
4370 return off;
4371}
4372#endif
4373
4374
4375/**
4376 * Called right before emitting a call instruction to move anything important
4377 * out of call-volatile registers, free and flush the call-volatile registers,
4378 * optionally freeing argument variables.
4379 *
4380 * @returns New code buffer offset, UINT32_MAX on failure.
4381 * @param pReNative The native recompile state.
4382 * @param off The code buffer offset.
4383 * @param cArgs The number of arguments the function call takes.
4384 * It is presumed that the host register part of these have
4385 * been allocated as such already and won't need moving,
4386 * just freeing.
4387 * @param fKeepVars Mask of variables that should keep their register
4388 * assignments. Caller must take care to handle these.
4389 */
4390DECL_HIDDEN_THROW(uint32_t)
4391iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4392{
4393 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4394
4395 /* fKeepVars will reduce this mask. */
4396 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4397
4398 /*
4399 * Move anything important out of volatile registers.
4400 */
4401 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4402 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4403 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4404#ifdef IEMNATIVE_REG_FIXED_TMP0
4405 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4406#endif
4407#ifdef IEMNATIVE_REG_FIXED_TMP1
4408 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4409#endif
4410#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4411 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4412#endif
4413 & ~g_afIemNativeCallRegs[cArgs];
4414
4415 fRegsToMove &= pReNative->Core.bmHstRegs;
4416 if (!fRegsToMove)
4417 { /* likely */ }
4418 else
4419 {
4420 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4421 while (fRegsToMove != 0)
4422 {
4423 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4424 fRegsToMove &= ~RT_BIT_32(idxReg);
4425
4426 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4427 {
4428 case kIemNativeWhat_Var:
4429 {
4430 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4431 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4432 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4433 Assert(pVar->idxReg == idxReg);
4434#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4435 Assert(!pVar->fSimdReg);
4436#endif
4437 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4438 {
4439 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4440 idxVar, pVar->enmKind, pVar->idxReg));
4441 if (pVar->enmKind != kIemNativeVarKind_Stack)
4442 pVar->idxReg = UINT8_MAX;
4443 else
4444 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4445 }
4446 else
4447 fRegsToFree &= ~RT_BIT_32(idxReg);
4448 continue;
4449 }
4450
4451 case kIemNativeWhat_Arg:
4452 AssertMsgFailed(("What?!?: %u\n", idxReg));
4453 continue;
4454
4455 case kIemNativeWhat_rc:
4456 case kIemNativeWhat_Tmp:
4457 AssertMsgFailed(("Missing free: %u\n", idxReg));
4458 continue;
4459
4460 case kIemNativeWhat_FixedTmp:
4461 case kIemNativeWhat_pVCpuFixed:
4462 case kIemNativeWhat_pCtxFixed:
4463 case kIemNativeWhat_PcShadow:
4464 case kIemNativeWhat_FixedReserved:
4465 case kIemNativeWhat_Invalid:
4466 case kIemNativeWhat_End:
4467 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4468 }
4469 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4470 }
4471 }
4472
4473 /*
4474 * Do the actual freeing.
4475 */
4476 if (pReNative->Core.bmHstRegs & fRegsToFree)
4477 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4478 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4479 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4480
4481 /* If there are guest register shadows in any call-volatile register, we
4482 have to clear the corrsponding guest register masks for each register. */
4483 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4484 if (fHstRegsWithGstShadow)
4485 {
4486 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4487 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4488 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4489 do
4490 {
4491 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4492 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4493
4494 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4495
4496#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4497 /*
4498 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4499 * to call volatile registers).
4500 */
4501 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4502 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4503 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4504#endif
4505
4506 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4507 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4508 } while (fHstRegsWithGstShadow != 0);
4509 }
4510
4511#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4512 /* Now for the SIMD registers, no argument support for now. */
4513 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4514#endif
4515
4516 return off;
4517}
4518
4519
4520/**
4521 * Flushes a set of guest register shadow copies.
4522 *
4523 * This is usually done after calling a threaded function or a C-implementation
4524 * of an instruction.
4525 *
4526 * @param pReNative The native recompile state.
4527 * @param fGstRegs Set of guest registers to flush.
4528 */
4529DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4530{
4531 /*
4532 * Reduce the mask by what's currently shadowed
4533 */
4534 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4535 fGstRegs &= bmGstRegShadowsOld;
4536 if (fGstRegs)
4537 {
4538 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4539 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4540 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4541 if (bmGstRegShadowsNew)
4542 {
4543 /*
4544 * Partial.
4545 */
4546 do
4547 {
4548 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4549 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4550 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4551 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4552 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4553#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4554 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4555#endif
4556
4557 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4558 fGstRegs &= ~fInThisHstReg;
4559 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4560 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4561 if (!fGstRegShadowsNew)
4562 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4563 } while (fGstRegs != 0);
4564 }
4565 else
4566 {
4567 /*
4568 * Clear all.
4569 */
4570 do
4571 {
4572 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4573 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4574 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4575 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4576 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4577#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4578 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4579#endif
4580
4581 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4582 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4583 } while (fGstRegs != 0);
4584 pReNative->Core.bmHstRegsWithGstShadow = 0;
4585 }
4586 }
4587}
4588
4589
4590/**
4591 * Flushes guest register shadow copies held by a set of host registers.
4592 *
4593 * This is used with the TLB lookup code for ensuring that we don't carry on
4594 * with any guest shadows in volatile registers, as these will get corrupted by
4595 * a TLB miss.
4596 *
4597 * @param pReNative The native recompile state.
4598 * @param fHstRegs Set of host registers to flush guest shadows for.
4599 */
4600DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4601{
4602 /*
4603 * Reduce the mask by what's currently shadowed.
4604 */
4605 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4606 fHstRegs &= bmHstRegsWithGstShadowOld;
4607 if (fHstRegs)
4608 {
4609 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4610 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4611 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4612 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4613 if (bmHstRegsWithGstShadowNew)
4614 {
4615 /*
4616 * Partial (likely).
4617 */
4618 uint64_t fGstShadows = 0;
4619 do
4620 {
4621 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4622 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4623 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4624 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4625#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4626 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4627#endif
4628
4629 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4630 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4631 fHstRegs &= ~RT_BIT_32(idxHstReg);
4632 } while (fHstRegs != 0);
4633 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4634 }
4635 else
4636 {
4637 /*
4638 * Clear all.
4639 */
4640 do
4641 {
4642 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4643 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4644 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4645 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4646#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4647 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4648#endif
4649
4650 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4651 fHstRegs &= ~RT_BIT_32(idxHstReg);
4652 } while (fHstRegs != 0);
4653 pReNative->Core.bmGstRegShadows = 0;
4654 }
4655 }
4656}
4657
4658
4659/**
4660 * Restores guest shadow copies in volatile registers.
4661 *
4662 * This is used after calling a helper function (think TLB miss) to restore the
4663 * register state of volatile registers.
4664 *
4665 * @param pReNative The native recompile state.
4666 * @param off The code buffer offset.
4667 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4668 * be active (allocated) w/o asserting. Hack.
4669 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4670 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4671 */
4672DECL_HIDDEN_THROW(uint32_t)
4673iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4674{
4675 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4676 if (fHstRegs)
4677 {
4678 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4679 do
4680 {
4681 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4682
4683 /* It's not fatal if a register is active holding a variable that
4684 shadowing a guest register, ASSUMING all pending guest register
4685 writes were flushed prior to the helper call. However, we'll be
4686 emitting duplicate restores, so it wasts code space. */
4687 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4688 RT_NOREF(fHstRegsActiveShadows);
4689
4690 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4691#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4692 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4693#endif
4694 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4695 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4696 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4697
4698 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4699 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4700
4701 fHstRegs &= ~RT_BIT_32(idxHstReg);
4702 } while (fHstRegs != 0);
4703 }
4704 return off;
4705}
4706
4707
4708
4709
4710/*********************************************************************************************************************************
4711* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4712*********************************************************************************************************************************/
4713#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4714
4715/**
4716 * Info about shadowed guest SIMD register values.
4717 * @see IEMNATIVEGSTSIMDREG
4718 */
4719static struct
4720{
4721 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4722 uint32_t offXmm;
4723 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4724 uint32_t offYmm;
4725 /** Name (for logging). */
4726 const char *pszName;
4727} const g_aGstSimdShadowInfo[] =
4728{
4729#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4730 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4731 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4732 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4733 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4734 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4735 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4736 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4737 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4738 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4739 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4740 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4741 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4742 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4743 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4744 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4745 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4746 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4747#undef CPUMCTX_OFF_AND_SIZE
4748};
4749AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4750
4751
4752/**
4753 * Frees a temporary SIMD register.
4754 *
4755 * Any shadow copies of guest registers assigned to the host register will not
4756 * be flushed by this operation.
4757 */
4758DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4759{
4760 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4761 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4762 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4763 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4764 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4765}
4766
4767
4768/**
4769 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4770 *
4771 * @returns New code bufferoffset.
4772 * @param pReNative The native recompile state.
4773 * @param off Current code buffer position.
4774 * @param enmGstSimdReg The guest SIMD register to flush.
4775 */
4776DECL_HIDDEN_THROW(uint32_t)
4777iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4778{
4779 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4780
4781 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4782 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4783 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4784 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4785
4786 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4787 {
4788 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4789 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4790 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4791 }
4792
4793 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4794 {
4795 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4796 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4797 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4798 }
4799
4800 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4801 return off;
4802}
4803
4804
4805/**
4806 * Flush the given set of guest SIMD registers if marked as dirty.
4807 *
4808 * @returns New code buffer offset.
4809 * @param pReNative The native recompile state.
4810 * @param off Current code buffer position.
4811 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4812 */
4813DECL_HIDDEN_THROW(uint32_t)
4814iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4815{
4816 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4817 & fFlushGstSimdReg;
4818 if (bmGstSimdRegShadowDirty)
4819 {
4820# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4821 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4822 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4823# endif
4824
4825 do
4826 {
4827 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4828 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4829 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4830 } while (bmGstSimdRegShadowDirty);
4831 }
4832
4833 return off;
4834}
4835
4836
4837#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4838/**
4839 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4840 *
4841 * @returns New code buffer offset.
4842 * @param pReNative The native recompile state.
4843 * @param off Current code buffer position.
4844 * @param idxHstSimdReg The host SIMD register.
4845 *
4846 * @note This doesn't do any unshadowing of guest registers from the host register.
4847 */
4848DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4849{
4850 /* We need to flush any pending guest register writes this host register shadows. */
4851 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4852 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4853 if (bmGstSimdRegShadowDirty)
4854 {
4855# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4856 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4857 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4858# endif
4859
4860 do
4861 {
4862 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4863 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4864 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4865 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4866 } while (bmGstSimdRegShadowDirty);
4867 }
4868
4869 return off;
4870}
4871#endif
4872
4873
4874/**
4875 * Locate a register, possibly freeing one up.
4876 *
4877 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4878 * failed.
4879 *
4880 * @returns Host register number on success. Returns UINT8_MAX if no registers
4881 * found, the caller is supposed to deal with this and raise a
4882 * allocation type specific status code (if desired).
4883 *
4884 * @throws VBox status code if we're run into trouble spilling a variable of
4885 * recording debug info. Does NOT throw anything if we're out of
4886 * registers, though.
4887 */
4888static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4889 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4890{
4891 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4892 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4893 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4894
4895 /*
4896 * Try a freed register that's shadowing a guest register.
4897 */
4898 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4899 if (fRegs)
4900 {
4901 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4902
4903#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4904 /*
4905 * When we have livness information, we use it to kick out all shadowed
4906 * guest register that will not be needed any more in this TB. If we're
4907 * lucky, this may prevent us from ending up here again.
4908 *
4909 * Note! We must consider the previous entry here so we don't free
4910 * anything that the current threaded function requires (current
4911 * entry is produced by the next threaded function).
4912 */
4913 uint32_t const idxCurCall = pReNative->idxCurCall;
4914 if (idxCurCall > 0)
4915 {
4916 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4917
4918# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4919 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4920 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4921 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4922#else
4923 /* Construct a mask of the registers not in the read or write state.
4924 Note! We could skips writes, if they aren't from us, as this is just
4925 a hack to prevent trashing registers that have just been written
4926 or will be written when we retire the current instruction. */
4927 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4928 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4929 & IEMLIVENESSBIT_MASK;
4930#endif
4931 /* If it matches any shadowed registers. */
4932 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4933 {
4934 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4935 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4936 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4937
4938 /* See if we've got any unshadowed registers we can return now. */
4939 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4940 if (fUnshadowedRegs)
4941 {
4942 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4943 return (fPreferVolatile
4944 ? ASMBitFirstSetU32(fUnshadowedRegs)
4945 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4946 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4947 - 1;
4948 }
4949 }
4950 }
4951#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4952
4953 unsigned const idxReg = (fPreferVolatile
4954 ? ASMBitFirstSetU32(fRegs)
4955 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4956 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4957 - 1;
4958
4959 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4960 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4961 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4962 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4963
4964 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4965 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4966
4967 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4968 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4969 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4970 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4971 return idxReg;
4972 }
4973
4974 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4975
4976 /*
4977 * Try free up a variable that's in a register.
4978 *
4979 * We do two rounds here, first evacuating variables we don't need to be
4980 * saved on the stack, then in the second round move things to the stack.
4981 */
4982 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4983 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4984 {
4985 uint32_t fVars = pReNative->Core.bmVars;
4986 while (fVars)
4987 {
4988 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4989 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4990 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4991 continue;
4992
4993 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4994 && (RT_BIT_32(idxReg) & fRegMask)
4995 && ( iLoop == 0
4996 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4997 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4998 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4999 {
5000 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5001 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5002 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5003 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5004 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5005 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5006
5007 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5008 {
5009 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5010 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5011 }
5012
5013 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5014 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5015
5016 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5017 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5018 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5019 return idxReg;
5020 }
5021 fVars &= ~RT_BIT_32(idxVar);
5022 }
5023 }
5024
5025 AssertFailed();
5026 return UINT8_MAX;
5027}
5028
5029
5030/**
5031 * Flushes a set of guest register shadow copies.
5032 *
5033 * This is usually done after calling a threaded function or a C-implementation
5034 * of an instruction.
5035 *
5036 * @param pReNative The native recompile state.
5037 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5038 */
5039DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5040{
5041 /*
5042 * Reduce the mask by what's currently shadowed
5043 */
5044 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5045 fGstSimdRegs &= bmGstSimdRegShadows;
5046 if (fGstSimdRegs)
5047 {
5048 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5049 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5050 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5051 if (bmGstSimdRegShadowsNew)
5052 {
5053 /*
5054 * Partial.
5055 */
5056 do
5057 {
5058 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5059 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5060 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5061 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5062 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5063 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5064
5065 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5066 fGstSimdRegs &= ~fInThisHstReg;
5067 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5068 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5069 if (!fGstRegShadowsNew)
5070 {
5071 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5072 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5073 }
5074 } while (fGstSimdRegs != 0);
5075 }
5076 else
5077 {
5078 /*
5079 * Clear all.
5080 */
5081 do
5082 {
5083 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5084 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5085 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5086 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5087 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5088 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5089
5090 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5091 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5092 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5093 } while (fGstSimdRegs != 0);
5094 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5095 }
5096 }
5097}
5098
5099
5100/**
5101 * Allocates a temporary host SIMD register.
5102 *
5103 * This may emit code to save register content onto the stack in order to free
5104 * up a register.
5105 *
5106 * @returns The host register number; throws VBox status code on failure,
5107 * so no need to check the return value.
5108 * @param pReNative The native recompile state.
5109 * @param poff Pointer to the variable with the code buffer position.
5110 * This will be update if we need to move a variable from
5111 * register to stack in order to satisfy the request.
5112 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5113 * registers (@c true, default) or the other way around
5114 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5115 */
5116DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5117{
5118 /*
5119 * Try find a completely unused register, preferably a call-volatile one.
5120 */
5121 uint8_t idxSimdReg;
5122 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5123 & ~pReNative->Core.bmHstRegsWithGstShadow
5124 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5125 if (fRegs)
5126 {
5127 if (fPreferVolatile)
5128 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5129 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5130 else
5131 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5132 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5133 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5134 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5135
5136 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5137 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5138 }
5139 else
5140 {
5141 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5142 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5143 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5144 }
5145
5146 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5147 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5148}
5149
5150
5151/**
5152 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5153 * registers.
5154 *
5155 * @returns The host register number; throws VBox status code on failure,
5156 * so no need to check the return value.
5157 * @param pReNative The native recompile state.
5158 * @param poff Pointer to the variable with the code buffer position.
5159 * This will be update if we need to move a variable from
5160 * register to stack in order to satisfy the request.
5161 * @param fRegMask Mask of acceptable registers.
5162 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5163 * registers (@c true, default) or the other way around
5164 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5165 */
5166DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5167 bool fPreferVolatile /*= true*/)
5168{
5169 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5170 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5171
5172 /*
5173 * Try find a completely unused register, preferably a call-volatile one.
5174 */
5175 uint8_t idxSimdReg;
5176 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5177 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5178 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5179 & fRegMask;
5180 if (fRegs)
5181 {
5182 if (fPreferVolatile)
5183 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5184 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5185 else
5186 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5187 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5188 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5189 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5190
5191 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5192 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5193 }
5194 else
5195 {
5196 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5197 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5198 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5199 }
5200
5201 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5202 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5203}
5204
5205
5206/**
5207 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5208 *
5209 * @param pReNative The native recompile state.
5210 * @param idxHstSimdReg The host SIMD register to update the state for.
5211 * @param enmLoadSz The load size to set.
5212 */
5213DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5214 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5215{
5216 /* Everything valid already? -> nothing to do. */
5217 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5218 return;
5219
5220 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5221 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5222 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5223 {
5224 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5225 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5226 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5227 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5228 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5229 }
5230}
5231
5232
5233static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5234 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5235{
5236 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5237 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5238 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5239 {
5240# ifdef RT_ARCH_ARM64
5241 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5242 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5243# endif
5244
5245 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5246 {
5247 switch (enmLoadSzDst)
5248 {
5249 case kIemNativeGstSimdRegLdStSz_256:
5250 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5251 break;
5252 case kIemNativeGstSimdRegLdStSz_Low128:
5253 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5254 break;
5255 case kIemNativeGstSimdRegLdStSz_High128:
5256 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5257 break;
5258 default:
5259 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5260 }
5261
5262 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5263 }
5264 }
5265 else
5266 {
5267 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5268 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5269 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5270 }
5271
5272 return off;
5273}
5274
5275
5276/**
5277 * Allocates a temporary host SIMD register for keeping a guest
5278 * SIMD register value.
5279 *
5280 * Since we may already have a register holding the guest register value,
5281 * code will be emitted to do the loading if that's not the case. Code may also
5282 * be emitted if we have to free up a register to satify the request.
5283 *
5284 * @returns The host register number; throws VBox status code on failure, so no
5285 * need to check the return value.
5286 * @param pReNative The native recompile state.
5287 * @param poff Pointer to the variable with the code buffer
5288 * position. This will be update if we need to move a
5289 * variable from register to stack in order to satisfy
5290 * the request.
5291 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5292 * @param enmIntendedUse How the caller will be using the host register.
5293 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5294 * register is okay (default). The ASSUMPTION here is
5295 * that the caller has already flushed all volatile
5296 * registers, so this is only applied if we allocate a
5297 * new register.
5298 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5299 */
5300DECL_HIDDEN_THROW(uint8_t)
5301iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5302 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5303 bool fNoVolatileRegs /*= false*/)
5304{
5305 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5306#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5307 AssertMsg( pReNative->idxCurCall == 0
5308 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5309 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5310 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5311 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5312 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5313 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5314#endif
5315#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5316 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5317#endif
5318 uint32_t const fRegMask = !fNoVolatileRegs
5319 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5320 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5321
5322 /*
5323 * First check if the guest register value is already in a host register.
5324 */
5325 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5326 {
5327 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5328 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5329 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5330 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5331
5332 /* It's not supposed to be allocated... */
5333 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5334 {
5335 /*
5336 * If the register will trash the guest shadow copy, try find a
5337 * completely unused register we can use instead. If that fails,
5338 * we need to disassociate the host reg from the guest reg.
5339 */
5340 /** @todo would be nice to know if preserving the register is in any way helpful. */
5341 /* If the purpose is calculations, try duplicate the register value as
5342 we'll be clobbering the shadow. */
5343 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5344 && ( ~pReNative->Core.bmHstSimdRegs
5345 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5346 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5347 {
5348 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5349
5350 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5351
5352 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5353 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5354 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5355 idxSimdReg = idxRegNew;
5356 }
5357 /* If the current register matches the restrictions, go ahead and allocate
5358 it for the caller. */
5359 else if (fRegMask & RT_BIT_32(idxSimdReg))
5360 {
5361 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5362 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5363 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5364 {
5365 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5366 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5367 else
5368 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5369 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5370 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5371 }
5372 else
5373 {
5374 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5375 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5376 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5377 }
5378 }
5379 /* Otherwise, allocate a register that satisfies the caller and transfer
5380 the shadowing if compatible with the intended use. (This basically
5381 means the call wants a non-volatile register (RSP push/pop scenario).) */
5382 else
5383 {
5384 Assert(fNoVolatileRegs);
5385 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5386 !fNoVolatileRegs
5387 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5388 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5389 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5390 {
5391 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5392 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5393 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5394 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5395 }
5396 else
5397 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5398 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5399 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5400 idxSimdReg = idxRegNew;
5401 }
5402 }
5403 else
5404 {
5405 /*
5406 * Oops. Shadowed guest register already allocated!
5407 *
5408 * Allocate a new register, copy the value and, if updating, the
5409 * guest shadow copy assignment to the new register.
5410 */
5411 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5412 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5413 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5414 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5415
5416 /** @todo share register for readonly access. */
5417 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5418 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5419
5420 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5421 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5422 else
5423 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5424
5425 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5426 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5427 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5428 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5429 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5430 else
5431 {
5432 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5433 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5434 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5435 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5436 }
5437 idxSimdReg = idxRegNew;
5438 }
5439 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5440
5441#ifdef VBOX_STRICT
5442 /* Strict builds: Check that the value is correct. */
5443 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5444 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5445#endif
5446
5447 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5448 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5449 {
5450# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5451 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5452 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5453# endif
5454
5455 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5456 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5457 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5458 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5459 else
5460 {
5461 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5462 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5463 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5464 }
5465 }
5466
5467 return idxSimdReg;
5468 }
5469
5470 /*
5471 * Allocate a new register, load it with the guest value and designate it as a copy of the
5472 */
5473 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5474
5475 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5476 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5477 else
5478 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5479
5480 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5481 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5482
5483 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5484 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5485 {
5486# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5487 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5488 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5489# endif
5490
5491 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5492 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5493 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5494 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5495 else
5496 {
5497 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5498 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5499 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5500 }
5501 }
5502
5503 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5504 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5505
5506 return idxRegNew;
5507}
5508
5509
5510/**
5511 * Flushes guest SIMD register shadow copies held by a set of host registers.
5512 *
5513 * This is used whenever calling an external helper for ensuring that we don't carry on
5514 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5515 *
5516 * @param pReNative The native recompile state.
5517 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5518 */
5519DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5520{
5521 /*
5522 * Reduce the mask by what's currently shadowed.
5523 */
5524 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5525 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5526 if (fHstSimdRegs)
5527 {
5528 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5529 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5530 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5531 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5532 if (bmHstSimdRegsWithGstShadowNew)
5533 {
5534 /*
5535 * Partial (likely).
5536 */
5537 uint64_t fGstShadows = 0;
5538 do
5539 {
5540 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5541 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5542 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5543 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5544 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5545 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5546
5547 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5548 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5549 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5550 } while (fHstSimdRegs != 0);
5551 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5552 }
5553 else
5554 {
5555 /*
5556 * Clear all.
5557 */
5558 do
5559 {
5560 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5561 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5562 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5563 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5564 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5565 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5566
5567 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5568 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5569 } while (fHstSimdRegs != 0);
5570 pReNative->Core.bmGstSimdRegShadows = 0;
5571 }
5572 }
5573}
5574#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5575
5576
5577
5578/*********************************************************************************************************************************
5579* Code emitters for flushing pending guest register writes and sanity checks *
5580*********************************************************************************************************************************/
5581
5582#ifdef VBOX_STRICT
5583/**
5584 * Does internal register allocator sanity checks.
5585 */
5586DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5587{
5588 /*
5589 * Iterate host registers building a guest shadowing set.
5590 */
5591 uint64_t bmGstRegShadows = 0;
5592 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5593 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5594 while (bmHstRegsWithGstShadow)
5595 {
5596 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5597 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5598 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5599
5600 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5601 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5602 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5603 bmGstRegShadows |= fThisGstRegShadows;
5604 while (fThisGstRegShadows)
5605 {
5606 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5607 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5608 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5609 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5610 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5611 }
5612 }
5613 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5614 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5615 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5616
5617 /*
5618 * Now the other way around, checking the guest to host index array.
5619 */
5620 bmHstRegsWithGstShadow = 0;
5621 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5622 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5623 while (bmGstRegShadows)
5624 {
5625 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5626 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5627 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5628
5629 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5630 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5631 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5632 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5633 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5634 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5635 }
5636 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5637 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5638 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5639}
5640#endif /* VBOX_STRICT */
5641
5642
5643/**
5644 * Flushes any delayed guest register writes.
5645 *
5646 * This must be called prior to calling CImpl functions and any helpers that use
5647 * the guest state (like raising exceptions) and such.
5648 *
5649 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5650 * the caller if it wishes to do so.
5651 */
5652DECL_HIDDEN_THROW(uint32_t)
5653iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5654{
5655#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5656 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5657 off = iemNativeEmitPcWriteback(pReNative, off);
5658#else
5659 RT_NOREF(pReNative, fGstShwExcept);
5660#endif
5661
5662#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5663 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5664#endif
5665
5666#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5667 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5668#endif
5669
5670 return off;
5671}
5672
5673
5674#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5675/**
5676 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5677 */
5678DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5679{
5680 Assert(pReNative->Core.offPc);
5681# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5682 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5683 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5684# endif
5685
5686# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5687 /* Allocate a temporary PC register. */
5688 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5689
5690 /* Perform the addition and store the result. */
5691 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5692 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5693
5694 /* Free but don't flush the PC register. */
5695 iemNativeRegFreeTmp(pReNative, idxPcReg);
5696# else
5697 /* Compare the shadow with the context value, they should match. */
5698 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5699 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5700# endif
5701
5702 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5703 pReNative->Core.offPc = 0;
5704 pReNative->Core.cInstrPcUpdateSkipped = 0;
5705
5706 return off;
5707}
5708#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5709
5710
5711/*********************************************************************************************************************************
5712* Code Emitters (larger snippets) *
5713*********************************************************************************************************************************/
5714
5715/**
5716 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5717 * extending to 64-bit width.
5718 *
5719 * @returns New code buffer offset on success, UINT32_MAX on failure.
5720 * @param pReNative .
5721 * @param off The current code buffer position.
5722 * @param idxHstReg The host register to load the guest register value into.
5723 * @param enmGstReg The guest register to load.
5724 *
5725 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5726 * that is something the caller needs to do if applicable.
5727 */
5728DECL_HIDDEN_THROW(uint32_t)
5729iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5730{
5731 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5732 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5733
5734 switch (g_aGstShadowInfo[enmGstReg].cb)
5735 {
5736 case sizeof(uint64_t):
5737 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5738 case sizeof(uint32_t):
5739 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5740 case sizeof(uint16_t):
5741 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5742#if 0 /* not present in the table. */
5743 case sizeof(uint8_t):
5744 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5745#endif
5746 default:
5747 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5748 }
5749}
5750
5751
5752#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5753/**
5754 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5755 *
5756 * @returns New code buffer offset on success, UINT32_MAX on failure.
5757 * @param pReNative The recompiler state.
5758 * @param off The current code buffer position.
5759 * @param idxHstSimdReg The host register to load the guest register value into.
5760 * @param enmGstSimdReg The guest register to load.
5761 * @param enmLoadSz The load size of the register.
5762 *
5763 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5764 * that is something the caller needs to do if applicable.
5765 */
5766DECL_HIDDEN_THROW(uint32_t)
5767iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5768 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5769{
5770 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5771
5772 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5773 switch (enmLoadSz)
5774 {
5775 case kIemNativeGstSimdRegLdStSz_256:
5776 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5777 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5778 case kIemNativeGstSimdRegLdStSz_Low128:
5779 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5780 case kIemNativeGstSimdRegLdStSz_High128:
5781 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5782 default:
5783 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5784 }
5785}
5786#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5787
5788#ifdef VBOX_STRICT
5789
5790/**
5791 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5792 *
5793 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5794 * Trashes EFLAGS on AMD64.
5795 */
5796DECL_HIDDEN_THROW(uint32_t)
5797iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5798{
5799# ifdef RT_ARCH_AMD64
5800 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5801
5802 /* rol reg64, 32 */
5803 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5804 pbCodeBuf[off++] = 0xc1;
5805 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5806 pbCodeBuf[off++] = 32;
5807
5808 /* test reg32, ffffffffh */
5809 if (idxReg >= 8)
5810 pbCodeBuf[off++] = X86_OP_REX_B;
5811 pbCodeBuf[off++] = 0xf7;
5812 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5813 pbCodeBuf[off++] = 0xff;
5814 pbCodeBuf[off++] = 0xff;
5815 pbCodeBuf[off++] = 0xff;
5816 pbCodeBuf[off++] = 0xff;
5817
5818 /* je/jz +1 */
5819 pbCodeBuf[off++] = 0x74;
5820 pbCodeBuf[off++] = 0x01;
5821
5822 /* int3 */
5823 pbCodeBuf[off++] = 0xcc;
5824
5825 /* rol reg64, 32 */
5826 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5827 pbCodeBuf[off++] = 0xc1;
5828 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5829 pbCodeBuf[off++] = 32;
5830
5831# elif defined(RT_ARCH_ARM64)
5832 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5833 /* lsr tmp0, reg64, #32 */
5834 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5835 /* cbz tmp0, +1 */
5836 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5837 /* brk #0x1100 */
5838 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5839
5840# else
5841# error "Port me!"
5842# endif
5843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5844 return off;
5845}
5846
5847
5848/**
5849 * Emitting code that checks that the content of register @a idxReg is the same
5850 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5851 * instruction if that's not the case.
5852 *
5853 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5854 * Trashes EFLAGS on AMD64.
5855 */
5856DECL_HIDDEN_THROW(uint32_t)
5857iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5858{
5859#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5860 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5861 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5862 return off;
5863#endif
5864
5865# ifdef RT_ARCH_AMD64
5866 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5867
5868 /* cmp reg, [mem] */
5869 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5870 {
5871 if (idxReg >= 8)
5872 pbCodeBuf[off++] = X86_OP_REX_R;
5873 pbCodeBuf[off++] = 0x38;
5874 }
5875 else
5876 {
5877 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5878 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5879 else
5880 {
5881 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5882 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5883 else
5884 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5885 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5886 if (idxReg >= 8)
5887 pbCodeBuf[off++] = X86_OP_REX_R;
5888 }
5889 pbCodeBuf[off++] = 0x39;
5890 }
5891 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5892
5893 /* je/jz +1 */
5894 pbCodeBuf[off++] = 0x74;
5895 pbCodeBuf[off++] = 0x01;
5896
5897 /* int3 */
5898 pbCodeBuf[off++] = 0xcc;
5899
5900 /* For values smaller than the register size, we must check that the rest
5901 of the register is all zeros. */
5902 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5903 {
5904 /* test reg64, imm32 */
5905 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5906 pbCodeBuf[off++] = 0xf7;
5907 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5908 pbCodeBuf[off++] = 0;
5909 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5910 pbCodeBuf[off++] = 0xff;
5911 pbCodeBuf[off++] = 0xff;
5912
5913 /* je/jz +1 */
5914 pbCodeBuf[off++] = 0x74;
5915 pbCodeBuf[off++] = 0x01;
5916
5917 /* int3 */
5918 pbCodeBuf[off++] = 0xcc;
5919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5920 }
5921 else
5922 {
5923 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5924 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5925 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5926 }
5927
5928# elif defined(RT_ARCH_ARM64)
5929 /* mov TMP0, [gstreg] */
5930 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5931
5932 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5933 /* sub tmp0, tmp0, idxReg */
5934 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5935 /* cbz tmp0, +1 */
5936 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5937 /* brk #0x1000+enmGstReg */
5938 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5939 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5940
5941# else
5942# error "Port me!"
5943# endif
5944 return off;
5945}
5946
5947
5948# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5949# ifdef RT_ARCH_AMD64
5950/**
5951 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5952 */
5953DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5954{
5955 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5956 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5957 if (idxSimdReg >= 8)
5958 pbCodeBuf[off++] = X86_OP_REX_R;
5959 pbCodeBuf[off++] = 0x0f;
5960 pbCodeBuf[off++] = 0x38;
5961 pbCodeBuf[off++] = 0x29;
5962 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5963
5964 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5965 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5966 pbCodeBuf[off++] = X86_OP_REX_W
5967 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5968 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5969 pbCodeBuf[off++] = 0x0f;
5970 pbCodeBuf[off++] = 0x3a;
5971 pbCodeBuf[off++] = 0x16;
5972 pbCodeBuf[off++] = 0xeb;
5973 pbCodeBuf[off++] = 0x00;
5974
5975 /* cmp tmp0, 0xffffffffffffffff. */
5976 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5977 pbCodeBuf[off++] = 0x83;
5978 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5979 pbCodeBuf[off++] = 0xff;
5980
5981 /* je/jz +1 */
5982 pbCodeBuf[off++] = 0x74;
5983 pbCodeBuf[off++] = 0x01;
5984
5985 /* int3 */
5986 pbCodeBuf[off++] = 0xcc;
5987
5988 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5989 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5990 pbCodeBuf[off++] = X86_OP_REX_W
5991 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5992 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5993 pbCodeBuf[off++] = 0x0f;
5994 pbCodeBuf[off++] = 0x3a;
5995 pbCodeBuf[off++] = 0x16;
5996 pbCodeBuf[off++] = 0xeb;
5997 pbCodeBuf[off++] = 0x01;
5998
5999 /* cmp tmp0, 0xffffffffffffffff. */
6000 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6001 pbCodeBuf[off++] = 0x83;
6002 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6003 pbCodeBuf[off++] = 0xff;
6004
6005 /* je/jz +1 */
6006 pbCodeBuf[off++] = 0x74;
6007 pbCodeBuf[off++] = 0x01;
6008
6009 /* int3 */
6010 pbCodeBuf[off++] = 0xcc;
6011
6012 return off;
6013}
6014# endif
6015
6016
6017/**
6018 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6019 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6020 * instruction if that's not the case.
6021 *
6022 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6023 * Trashes EFLAGS on AMD64.
6024 */
6025DECL_HIDDEN_THROW(uint32_t)
6026iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6027 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6028{
6029 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6030 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6031 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6032 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6033 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6034 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6035 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6036 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6037 return off;
6038
6039# ifdef RT_ARCH_AMD64
6040 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6041 {
6042 /* movdqa vectmp0, idxSimdReg */
6043 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6044
6045 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6046
6047 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6048 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6049 }
6050
6051 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6052 {
6053 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6054 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6055
6056 /* vextracti128 vectmp0, idxSimdReg, 1 */
6057 pbCodeBuf[off++] = X86_OP_VEX3;
6058 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6059 | X86_OP_VEX3_BYTE1_X
6060 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6061 | 0x03; /* Opcode map */
6062 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6063 pbCodeBuf[off++] = 0x39;
6064 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6065 pbCodeBuf[off++] = 0x01;
6066
6067 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6068 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6069 }
6070# elif defined(RT_ARCH_ARM64)
6071 /* mov vectmp0, [gstreg] */
6072 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6073
6074 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6075 {
6076 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6077 /* eor vectmp0, vectmp0, idxSimdReg */
6078 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6079 /* uaddlv vectmp0, vectmp0.16B */
6080 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6081 /* umov tmp0, vectmp0.H[0] */
6082 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6083 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6084 /* cbz tmp0, +1 */
6085 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6086 /* brk #0x1000+enmGstReg */
6087 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6088 }
6089
6090 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6091 {
6092 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6093 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6094 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6095 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6096 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6097 /* umov tmp0, (vectmp0 + 1).H[0] */
6098 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6099 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6100 /* cbz tmp0, +1 */
6101 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6102 /* brk #0x1000+enmGstReg */
6103 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6104 }
6105
6106# else
6107# error "Port me!"
6108# endif
6109
6110 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6111 return off;
6112}
6113# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6114
6115
6116/**
6117 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6118 * important bits.
6119 *
6120 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6121 * Trashes EFLAGS on AMD64.
6122 */
6123DECL_HIDDEN_THROW(uint32_t)
6124iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6125{
6126 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6127 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6128 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6129 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6130
6131#ifdef RT_ARCH_AMD64
6132 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6133
6134 /* je/jz +1 */
6135 pbCodeBuf[off++] = 0x74;
6136 pbCodeBuf[off++] = 0x01;
6137
6138 /* int3 */
6139 pbCodeBuf[off++] = 0xcc;
6140
6141# elif defined(RT_ARCH_ARM64)
6142 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6143
6144 /* b.eq +1 */
6145 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6146 /* brk #0x2000 */
6147 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6148
6149# else
6150# error "Port me!"
6151# endif
6152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6153
6154 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6155 return off;
6156}
6157
6158#endif /* VBOX_STRICT */
6159
6160
6161#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6162/**
6163 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6164 */
6165DECL_HIDDEN_THROW(uint32_t)
6166iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6167{
6168 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6169
6170 fEflNeeded &= X86_EFL_STATUS_BITS;
6171 if (fEflNeeded)
6172 {
6173# ifdef RT_ARCH_AMD64
6174 /* test dword [pVCpu + offVCpu], imm32 */
6175 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6176 if (fEflNeeded <= 0xff)
6177 {
6178 pCodeBuf[off++] = 0xf6;
6179 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6180 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6181 }
6182 else
6183 {
6184 pCodeBuf[off++] = 0xf7;
6185 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6186 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6187 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6188 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6189 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6190 }
6191
6192 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6193 pCodeBuf[off++] = 0xcc;
6194
6195 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6196
6197# else
6198 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6199 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6200 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6201# ifdef RT_ARCH_ARM64
6202 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6203 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6204# else
6205# error "Port me!"
6206# endif
6207 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6208# endif
6209 }
6210 return off;
6211}
6212#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6213
6214
6215/**
6216 * Emits a code for checking the return code of a call and rcPassUp, returning
6217 * from the code if either are non-zero.
6218 */
6219DECL_HIDDEN_THROW(uint32_t)
6220iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6221{
6222#ifdef RT_ARCH_AMD64
6223 /*
6224 * AMD64: eax = call status code.
6225 */
6226
6227 /* edx = rcPassUp */
6228 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6229# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6230 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6231# endif
6232
6233 /* edx = eax | rcPassUp */
6234 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6235 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6236 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6237 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6238
6239 /* Jump to non-zero status return path. */
6240 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6241
6242 /* done. */
6243
6244#elif RT_ARCH_ARM64
6245 /*
6246 * ARM64: w0 = call status code.
6247 */
6248# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6249 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6250# endif
6251 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6252
6253 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6254
6255 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6256
6257 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pu32CodeBuf, off, ARMV8_A64_REG_X4, true /*f64Bit*/,
6258 kIemNativeLabelType_NonZeroRetOrPassUp);
6259
6260#else
6261# error "port me"
6262#endif
6263 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6264 RT_NOREF_PV(idxInstr);
6265 return off;
6266}
6267
6268
6269/**
6270 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6271 * raising a \#GP(0) if it isn't.
6272 *
6273 * @returns New code buffer offset, UINT32_MAX on failure.
6274 * @param pReNative The native recompile state.
6275 * @param off The code buffer offset.
6276 * @param idxAddrReg The host register with the address to check.
6277 * @param idxInstr The current instruction.
6278 */
6279DECL_HIDDEN_THROW(uint32_t)
6280iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6281{
6282 /*
6283 * Make sure we don't have any outstanding guest register writes as we may
6284 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6285 */
6286 off = iemNativeRegFlushPendingWrites(pReNative, off);
6287
6288#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6289 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6290#else
6291 RT_NOREF(idxInstr);
6292#endif
6293
6294#ifdef RT_ARCH_AMD64
6295 /*
6296 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6297 * return raisexcpt();
6298 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6299 */
6300 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6301
6302 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6303 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6304 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6305 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6306 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
6307
6308 iemNativeRegFreeTmp(pReNative, iTmpReg);
6309
6310#elif defined(RT_ARCH_ARM64)
6311 /*
6312 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6313 * return raisexcpt();
6314 * ----
6315 * mov x1, 0x800000000000
6316 * add x1, x0, x1
6317 * cmp xzr, x1, lsr 48
6318 * b.ne .Lraisexcpt
6319 */
6320 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6321
6322 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6323 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6324 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6325 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
6326
6327 iemNativeRegFreeTmp(pReNative, iTmpReg);
6328
6329#else
6330# error "Port me"
6331#endif
6332 return off;
6333}
6334
6335
6336/**
6337 * Emits code to check if that the content of @a idxAddrReg is within the limit
6338 * of CS, raising a \#GP(0) if it isn't.
6339 *
6340 * @returns New code buffer offset; throws VBox status code on error.
6341 * @param pReNative The native recompile state.
6342 * @param off The code buffer offset.
6343 * @param idxAddrReg The host register (32-bit) with the address to
6344 * check.
6345 * @param idxInstr The current instruction.
6346 */
6347DECL_HIDDEN_THROW(uint32_t)
6348iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6349 uint8_t idxAddrReg, uint8_t idxInstr)
6350{
6351 /*
6352 * Make sure we don't have any outstanding guest register writes as we may
6353 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6354 */
6355 off = iemNativeRegFlushPendingWrites(pReNative, off);
6356
6357#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6358 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6359#else
6360 RT_NOREF(idxInstr);
6361#endif
6362
6363 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6364 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6365 kIemNativeGstRegUse_ReadOnly);
6366
6367 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6368 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
6369
6370 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6371 return off;
6372}
6373
6374
6375/**
6376 * Emits a call to a CImpl function or something similar.
6377 */
6378DECL_HIDDEN_THROW(uint32_t)
6379iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6380 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6381{
6382 /* Writeback everything. */
6383 off = iemNativeRegFlushPendingWrites(pReNative, off);
6384
6385 /*
6386 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6387 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6388 */
6389 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6390 fGstShwFlush
6391 | RT_BIT_64(kIemNativeGstReg_Pc)
6392 | RT_BIT_64(kIemNativeGstReg_EFlags));
6393 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6394
6395 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6396
6397 /*
6398 * Load the parameters.
6399 */
6400#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6401 /* Special code the hidden VBOXSTRICTRC pointer. */
6402 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6403 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6404 if (cAddParams > 0)
6405 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6406 if (cAddParams > 1)
6407 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6408 if (cAddParams > 2)
6409 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6410 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6411
6412#else
6413 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6414 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6415 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6416 if (cAddParams > 0)
6417 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6418 if (cAddParams > 1)
6419 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6420 if (cAddParams > 2)
6421# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6422 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6423# else
6424 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6425# endif
6426#endif
6427
6428 /*
6429 * Make the call.
6430 */
6431 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6432
6433#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6434 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6435#endif
6436
6437 /*
6438 * Check the status code.
6439 */
6440 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6441}
6442
6443
6444/**
6445 * Emits a call to a threaded worker function.
6446 */
6447DECL_HIDDEN_THROW(uint32_t)
6448iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6449{
6450 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6451
6452 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6453 off = iemNativeRegFlushPendingWrites(pReNative, off);
6454
6455 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6456 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6457
6458#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6459 /* The threaded function may throw / long jmp, so set current instruction
6460 number if we're counting. */
6461 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6462#endif
6463
6464 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6465
6466#ifdef RT_ARCH_AMD64
6467 /* Load the parameters and emit the call. */
6468# ifdef RT_OS_WINDOWS
6469# ifndef VBOXSTRICTRC_STRICT_ENABLED
6470 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6471 if (cParams > 0)
6472 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6473 if (cParams > 1)
6474 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6475 if (cParams > 2)
6476 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6477# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6478 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6479 if (cParams > 0)
6480 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6481 if (cParams > 1)
6482 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6483 if (cParams > 2)
6484 {
6485 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6486 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6487 }
6488 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6489# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6490# else
6491 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6492 if (cParams > 0)
6493 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6494 if (cParams > 1)
6495 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6496 if (cParams > 2)
6497 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6498# endif
6499
6500 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6501
6502# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6503 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6504# endif
6505
6506#elif RT_ARCH_ARM64
6507 /*
6508 * ARM64:
6509 */
6510 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6511 if (cParams > 0)
6512 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6513 if (cParams > 1)
6514 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6515 if (cParams > 2)
6516 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6517
6518 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6519
6520#else
6521# error "port me"
6522#endif
6523
6524 /*
6525 * Check the status code.
6526 */
6527 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6528
6529 return off;
6530}
6531
6532#ifdef VBOX_WITH_STATISTICS
6533
6534/**
6535 * Emits code to update the thread call statistics.
6536 */
6537DECL_INLINE_THROW(uint32_t)
6538iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6539{
6540 /*
6541 * Update threaded function stats.
6542 */
6543 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6544 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6545# if defined(RT_ARCH_ARM64)
6546 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6547 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6548 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6549 iemNativeRegFreeTmp(pReNative, idxTmp1);
6550 iemNativeRegFreeTmp(pReNative, idxTmp2);
6551# else
6552 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6553# endif
6554 return off;
6555}
6556
6557
6558/**
6559 * Emits code to update the TB exit reason statistics.
6560 */
6561DECL_INLINE_THROW(uint32_t)
6562iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6563{
6564 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6565 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6566 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6567 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6568 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6569
6570 return off;
6571}
6572
6573#endif /* VBOX_WITH_STATISTICS */
6574
6575/**
6576 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6577 */
6578static uint32_t
6579iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6580{
6581 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6582 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6583
6584 /* Jump to ReturnBreak if the return register is NULL. */
6585 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6586 true /*f64Bit*/, offReturnBreak);
6587
6588 /* Okay, continue executing the next TB. */
6589 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6590 return off;
6591}
6592
6593#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6594
6595/**
6596 * Worker for iemNativeEmitReturnBreakViaLookup.
6597 */
6598static uint32_t iemNativeEmitViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak,
6599 IEMNATIVELABELTYPE enmLabel, uintptr_t pfnHelper)
6600{
6601 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
6602 if (idxLabel != UINT32_MAX)
6603 {
6604 iemNativeLabelDefine(pReNative, idxLabel, off);
6605 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, pfnHelper);
6606 }
6607 return off;
6608}
6609
6610
6611/**
6612 * Emits the code at the ReturnBreakViaLookup, ReturnBreakViaLookupWithIrq,
6613 * ReturnBreakViaLookupWithTlb and ReturnBreakViaLookupWithTlbAndIrq labels
6614 * (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS or jumps to the next TB).
6615 */
6616static uint32_t iemNativeEmitReturnBreakViaLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnBreakLabel)
6617{
6618 uint32_t const offReturnBreak = pReNative->paLabels[idxReturnBreakLabel].off;
6619 Assert(offReturnBreak < off);
6620
6621 /*
6622 * The lookup table index is in IEMNATIVE_CALL_ARG1_GREG for all.
6623 * The GCPhysPc is in IEMNATIVE_CALL_ARG2_GREG for ReturnBreakViaLookupWithPc.
6624 */
6625 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookup,
6626 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/>);
6627 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
6628 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/>);
6629 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
6630 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/>);
6631 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
6632 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/>);
6633 return off;
6634}
6635
6636#endif /* !IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
6637
6638/**
6639 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6640 */
6641static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6642{
6643 /* set the return status */
6644 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6645}
6646
6647
6648#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6649/**
6650 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6651 */
6652static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6653{
6654 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6655 if (idxLabel != UINT32_MAX)
6656 {
6657 iemNativeLabelDefine(pReNative, idxLabel, off);
6658 /* set the return status */
6659 off = iemNativeEmitCoreReturnWithFlags(pReNative, off);
6660 /* jump back to the return sequence. */
6661 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6662 }
6663 return off;
6664}
6665#endif
6666
6667
6668/**
6669 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6670 */
6671static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6672{
6673 /* set the return status */
6674 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6675}
6676
6677
6678#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6679/**
6680 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6681 */
6682static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6683{
6684 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6685 if (idxLabel != UINT32_MAX)
6686 {
6687 iemNativeLabelDefine(pReNative, idxLabel, off);
6688 /* set the return status */
6689 off = iemNativeEmitCoreReturnBreakFF(pReNative, off);
6690 /* jump back to the return sequence. */
6691 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6692 }
6693 return off;
6694}
6695#endif
6696
6697
6698/**
6699 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6700 */
6701static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6702{
6703 /* set the return status */
6704 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6705}
6706
6707
6708#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6709/**
6710 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6711 */
6712static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6713{
6714 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6715 if (idxLabel != UINT32_MAX)
6716 {
6717 iemNativeLabelDefine(pReNative, idxLabel, off);
6718 /* set the return status */
6719 off = iemNativeEmitCoreReturnBreak(pReNative, off);
6720 /* jump back to the return sequence. */
6721 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6722 }
6723 return off;
6724}
6725#endif
6726
6727
6728/**
6729 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6730 */
6731static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6732{
6733 /*
6734 * Generate the rc + rcPassUp fiddling code.
6735 */
6736 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6737#ifdef RT_ARCH_AMD64
6738# ifdef RT_OS_WINDOWS
6739# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6741# endif
6742 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6743 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6744# else
6745 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6746 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6747# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6749# endif
6750# endif
6751# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6752 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6753# endif
6754
6755#else
6756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6757 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6758 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6759#endif
6760
6761 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6762 return off;
6763}
6764
6765
6766#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6767/**
6768 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6769 */
6770static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6771{
6772 /*
6773 * Generate the rc + rcPassUp fiddling code if needed.
6774 */
6775 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6776 if (idxLabel != UINT32_MAX)
6777 {
6778 iemNativeLabelDefine(pReNative, idxLabel, off);
6779 off = iemNativeEmitCoreRcFiddling(pReNative, off);
6780 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6781 }
6782 return off;
6783}
6784#endif
6785
6786
6787/**
6788 * Emits a standard epilog.
6789 */
6790static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6791{
6792 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6793
6794 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6795
6796 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6797 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6798
6799 /*
6800 * Restore registers and return.
6801 */
6802#ifdef RT_ARCH_AMD64
6803 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6804
6805 /* Reposition esp at the r15 restore point. */
6806 pbCodeBuf[off++] = X86_OP_REX_W;
6807 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6808 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6809 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6810
6811 /* Pop non-volatile registers and return */
6812 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6813 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6814 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6815 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6816 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6817 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6818 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6819 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6820# ifdef RT_OS_WINDOWS
6821 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6822 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6823# endif
6824 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6825 pbCodeBuf[off++] = 0xc9; /* leave */
6826 pbCodeBuf[off++] = 0xc3; /* ret */
6827 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6828
6829#elif RT_ARCH_ARM64
6830 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6831
6832 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6833 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6834 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6835 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6836 IEMNATIVE_FRAME_VAR_SIZE / 8);
6837 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6838 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6839 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6840 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6841 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6842 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6843 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6844 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6845 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6846 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6847 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6848 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6849
6850 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6851 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6852 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6853 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6854
6855 /* retab / ret */
6856# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6857 if (1)
6858 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6859 else
6860# endif
6861 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6862
6863#else
6864# error "port me"
6865#endif
6866 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6867
6868 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6870
6871 return off;
6872}
6873
6874
6875#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6876/**
6877 * Emits a standard epilog.
6878 */
6879static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6880{
6881 /*
6882 * Define label for common return point.
6883 */
6884 *pidxReturnLabel = UINT32_MAX;
6885 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6886 *pidxReturnLabel = idxReturn;
6887
6888 /*
6889 * Emit the code.
6890 */
6891 return iemNativeEmitCoreEpilog(pReNative, off);
6892}
6893#endif
6894
6895
6896#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
6897/**
6898 * Emits a standard prolog.
6899 */
6900static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6901{
6902#ifdef RT_ARCH_AMD64
6903 /*
6904 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6905 * reserving 64 bytes for stack variables plus 4 non-register argument
6906 * slots. Fixed register assignment: xBX = pReNative;
6907 *
6908 * Since we always do the same register spilling, we can use the same
6909 * unwind description for all the code.
6910 */
6911 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6912 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6913 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6914 pbCodeBuf[off++] = 0x8b;
6915 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6916 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6917 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6918# ifdef RT_OS_WINDOWS
6919 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6920 pbCodeBuf[off++] = 0x8b;
6921 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6922 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6923 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6924# else
6925 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6926 pbCodeBuf[off++] = 0x8b;
6927 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6928# endif
6929 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6930 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6931 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6932 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6933 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6934 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6935 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6936 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6937
6938# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6939 /* Save the frame pointer. */
6940 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6941# endif
6942
6943 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6944 X86_GREG_xSP,
6945 IEMNATIVE_FRAME_ALIGN_SIZE
6946 + IEMNATIVE_FRAME_VAR_SIZE
6947 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6948 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6949 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6950 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6951 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6952
6953#elif RT_ARCH_ARM64
6954 /*
6955 * We set up a stack frame exactly like on x86, only we have to push the
6956 * return address our selves here. We save all non-volatile registers.
6957 */
6958 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6959
6960# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6961 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6962 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6963 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6964 /* pacibsp */
6965 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6966# endif
6967
6968 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6969 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6970 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6971 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6972 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6973 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6974 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6975 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6976 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6977 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6978 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6979 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6980 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6981 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6982 /* Save the BP and LR (ret address) registers at the top of the frame. */
6983 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6984 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6985 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6986 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6987 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6988 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6989
6990 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6991 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6992
6993 /* mov r28, r0 */
6994 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6995 /* mov r27, r1 */
6996 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6997
6998# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6999 /* Save the frame pointer. */
7000 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7001 ARMV8_A64_REG_X2);
7002# endif
7003
7004#else
7005# error "port me"
7006#endif
7007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7008 return off;
7009}
7010#endif
7011
7012
7013/*********************************************************************************************************************************
7014* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7015*********************************************************************************************************************************/
7016
7017/**
7018 * Internal work that allocates a variable with kind set to
7019 * kIemNativeVarKind_Invalid and no current stack allocation.
7020 *
7021 * The kind will either be set by the caller or later when the variable is first
7022 * assigned a value.
7023 *
7024 * @returns Unpacked index.
7025 * @internal
7026 */
7027static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7028{
7029 Assert(cbType > 0 && cbType <= 64);
7030 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7031 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7032 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7033 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7034 pReNative->Core.aVars[idxVar].cbVar = cbType;
7035 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7036 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7037 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7038 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7039 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7040 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7041 pReNative->Core.aVars[idxVar].u.uValue = 0;
7042#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7043 pReNative->Core.aVars[idxVar].fSimdReg = false;
7044#endif
7045 return idxVar;
7046}
7047
7048
7049/**
7050 * Internal work that allocates an argument variable w/o setting enmKind.
7051 *
7052 * @returns Unpacked index.
7053 * @internal
7054 */
7055static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7056{
7057 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7058 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7059 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7060
7061 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7062 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7063 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7064 return idxVar;
7065}
7066
7067
7068/**
7069 * Gets the stack slot for a stack variable, allocating one if necessary.
7070 *
7071 * Calling this function implies that the stack slot will contain a valid
7072 * variable value. The caller deals with any register currently assigned to the
7073 * variable, typically by spilling it into the stack slot.
7074 *
7075 * @returns The stack slot number.
7076 * @param pReNative The recompiler state.
7077 * @param idxVar The variable.
7078 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7079 */
7080DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7081{
7082 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7083 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7084 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7085
7086 /* Already got a slot? */
7087 uint8_t const idxStackSlot = pVar->idxStackSlot;
7088 if (idxStackSlot != UINT8_MAX)
7089 {
7090 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7091 return idxStackSlot;
7092 }
7093
7094 /*
7095 * A single slot is easy to allocate.
7096 * Allocate them from the top end, closest to BP, to reduce the displacement.
7097 */
7098 if (pVar->cbVar <= sizeof(uint64_t))
7099 {
7100 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7101 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7102 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7103 pVar->idxStackSlot = (uint8_t)iSlot;
7104 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7105 return (uint8_t)iSlot;
7106 }
7107
7108 /*
7109 * We need more than one stack slot.
7110 *
7111 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7112 */
7113 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7114 Assert(pVar->cbVar <= 64);
7115 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7116 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7117 uint32_t bmStack = pReNative->Core.bmStack;
7118 while (bmStack != UINT32_MAX)
7119 {
7120 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7121 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7122 iSlot = (iSlot - 1) & ~fBitAlignMask;
7123 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7124 {
7125 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7126 pVar->idxStackSlot = (uint8_t)iSlot;
7127 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7128 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7129 return (uint8_t)iSlot;
7130 }
7131
7132 bmStack |= (fBitAllocMask << iSlot);
7133 }
7134 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7135}
7136
7137
7138/**
7139 * Changes the variable to a stack variable.
7140 *
7141 * Currently this is s only possible to do the first time the variable is used,
7142 * switching later is can be implemented but not done.
7143 *
7144 * @param pReNative The recompiler state.
7145 * @param idxVar The variable.
7146 * @throws VERR_IEM_VAR_IPE_2
7147 */
7148DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7149{
7150 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7151 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7152 if (pVar->enmKind != kIemNativeVarKind_Stack)
7153 {
7154 /* We could in theory transition from immediate to stack as well, but it
7155 would involve the caller doing work storing the value on the stack. So,
7156 till that's required we only allow transition from invalid. */
7157 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7158 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7159 pVar->enmKind = kIemNativeVarKind_Stack;
7160
7161 /* Note! We don't allocate a stack slot here, that's only done when a
7162 slot is actually needed to hold a variable value. */
7163 }
7164}
7165
7166
7167/**
7168 * Sets it to a variable with a constant value.
7169 *
7170 * This does not require stack storage as we know the value and can always
7171 * reload it, unless of course it's referenced.
7172 *
7173 * @param pReNative The recompiler state.
7174 * @param idxVar The variable.
7175 * @param uValue The immediate value.
7176 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7177 */
7178DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7179{
7180 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7181 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7182 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7183 {
7184 /* Only simple transitions for now. */
7185 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7186 pVar->enmKind = kIemNativeVarKind_Immediate;
7187 }
7188 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7189
7190 pVar->u.uValue = uValue;
7191 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7192 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7193 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7194}
7195
7196
7197/**
7198 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7199 *
7200 * This does not require stack storage as we know the value and can always
7201 * reload it. Loading is postponed till needed.
7202 *
7203 * @param pReNative The recompiler state.
7204 * @param idxVar The variable. Unpacked.
7205 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7206 *
7207 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7208 * @internal
7209 */
7210static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7211{
7212 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7213 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7214
7215 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7216 {
7217 /* Only simple transitions for now. */
7218 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7219 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7220 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7221 }
7222 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7223
7224 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7225
7226 /* Update the other variable, ensure it's a stack variable. */
7227 /** @todo handle variables with const values... that'll go boom now. */
7228 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7229 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7230}
7231
7232
7233/**
7234 * Sets the variable to a reference (pointer) to a guest register reference.
7235 *
7236 * This does not require stack storage as we know the value and can always
7237 * reload it. Loading is postponed till needed.
7238 *
7239 * @param pReNative The recompiler state.
7240 * @param idxVar The variable.
7241 * @param enmRegClass The class guest registers to reference.
7242 * @param idxReg The register within @a enmRegClass to reference.
7243 *
7244 * @throws VERR_IEM_VAR_IPE_2
7245 */
7246DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7247 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7248{
7249 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7250 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7251
7252 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7253 {
7254 /* Only simple transitions for now. */
7255 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7256 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7257 }
7258 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7259
7260 pVar->u.GstRegRef.enmClass = enmRegClass;
7261 pVar->u.GstRegRef.idx = idxReg;
7262}
7263
7264
7265DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7266{
7267 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7268}
7269
7270
7271DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7272{
7273 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7274
7275 /* Since we're using a generic uint64_t value type, we must truncate it if
7276 the variable is smaller otherwise we may end up with too large value when
7277 scaling up a imm8 w/ sign-extension.
7278
7279 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7280 in the bios, bx=1) when running on arm, because clang expect 16-bit
7281 register parameters to have bits 16 and up set to zero. Instead of
7282 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7283 CF value in the result. */
7284 switch (cbType)
7285 {
7286 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7287 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7288 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7289 }
7290 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7291 return idxVar;
7292}
7293
7294
7295DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7296{
7297 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7298 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7299 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7300 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7301 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7302 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7303
7304 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7305 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7306 return idxArgVar;
7307}
7308
7309
7310DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7311{
7312 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7313 /* Don't set to stack now, leave that to the first use as for instance
7314 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7315 return idxVar;
7316}
7317
7318
7319DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7320{
7321 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7322
7323 /* Since we're using a generic uint64_t value type, we must truncate it if
7324 the variable is smaller otherwise we may end up with too large value when
7325 scaling up a imm8 w/ sign-extension. */
7326 switch (cbType)
7327 {
7328 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7329 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7330 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7331 }
7332 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7333 return idxVar;
7334}
7335
7336
7337DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
7338{
7339 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7340 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7341
7342 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7343 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7344
7345 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7346
7347 /* Truncate the value to this variables size. */
7348 switch (cbType)
7349 {
7350 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7351 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7352 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7353 }
7354
7355 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7356 iemNativeVarRegisterRelease(pReNative, idxVar);
7357 return idxVar;
7358}
7359
7360
7361/**
7362 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7363 * fixed till we call iemNativeVarRegisterRelease.
7364 *
7365 * @returns The host register number.
7366 * @param pReNative The recompiler state.
7367 * @param idxVar The variable.
7368 * @param poff Pointer to the instruction buffer offset.
7369 * In case a register needs to be freed up or the value
7370 * loaded off the stack.
7371 * @param fInitialized Set if the variable must already have been initialized.
7372 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7373 * the case.
7374 * @param idxRegPref Preferred register number or UINT8_MAX.
7375 */
7376DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7377 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7378{
7379 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7380 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7381 Assert(pVar->cbVar <= 8);
7382 Assert(!pVar->fRegAcquired);
7383
7384 uint8_t idxReg = pVar->idxReg;
7385 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7386 {
7387 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7388 && pVar->enmKind < kIemNativeVarKind_End);
7389 pVar->fRegAcquired = true;
7390 return idxReg;
7391 }
7392
7393 /*
7394 * If the kind of variable has not yet been set, default to 'stack'.
7395 */
7396 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7397 && pVar->enmKind < kIemNativeVarKind_End);
7398 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7399 iemNativeVarSetKindToStack(pReNative, idxVar);
7400
7401 /*
7402 * We have to allocate a register for the variable, even if its a stack one
7403 * as we don't know if there are modification being made to it before its
7404 * finalized (todo: analyze and insert hints about that?).
7405 *
7406 * If we can, we try get the correct register for argument variables. This
7407 * is assuming that most argument variables are fetched as close as possible
7408 * to the actual call, so that there aren't any interfering hidden calls
7409 * (memory accesses, etc) inbetween.
7410 *
7411 * If we cannot or it's a variable, we make sure no argument registers
7412 * that will be used by this MC block will be allocated here, and we always
7413 * prefer non-volatile registers to avoid needing to spill stuff for internal
7414 * call.
7415 */
7416 /** @todo Detect too early argument value fetches and warn about hidden
7417 * calls causing less optimal code to be generated in the python script. */
7418
7419 uint8_t const uArgNo = pVar->uArgNo;
7420 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7421 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7422 {
7423 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7424
7425#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7426 /* Writeback any dirty shadow registers we are about to unshadow. */
7427 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7428#endif
7429
7430 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7431 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7432 }
7433 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7434 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7435 {
7436 /** @todo there must be a better way for this and boot cArgsX? */
7437 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7438 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7439 & ~pReNative->Core.bmHstRegsWithGstShadow
7440 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7441 & fNotArgsMask;
7442 if (fRegs)
7443 {
7444 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7445 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7446 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7447 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7448 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7449 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7450 }
7451 else
7452 {
7453 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7454 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7455 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7456 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7457 }
7458 }
7459 else
7460 {
7461 idxReg = idxRegPref;
7462 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7463 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7464 }
7465 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7466 pVar->idxReg = idxReg;
7467
7468#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7469 pVar->fSimdReg = false;
7470#endif
7471
7472 /*
7473 * Load it off the stack if we've got a stack slot.
7474 */
7475 uint8_t const idxStackSlot = pVar->idxStackSlot;
7476 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7477 {
7478 Assert(fInitialized);
7479 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7480 switch (pVar->cbVar)
7481 {
7482 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7483 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7484 case 3: AssertFailed(); RT_FALL_THRU();
7485 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7486 default: AssertFailed(); RT_FALL_THRU();
7487 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7488 }
7489 }
7490 else
7491 {
7492 Assert(idxStackSlot == UINT8_MAX);
7493 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7494 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7495 else
7496 {
7497 /*
7498 * Convert from immediate to stack/register. This is currently only
7499 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7500 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7501 */
7502 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7503 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7504 idxVar, idxReg, pVar->u.uValue));
7505 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7506 pVar->enmKind = kIemNativeVarKind_Stack;
7507 }
7508 }
7509
7510 pVar->fRegAcquired = true;
7511 return idxReg;
7512}
7513
7514
7515#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7516/**
7517 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7518 * fixed till we call iemNativeVarRegisterRelease.
7519 *
7520 * @returns The host register number.
7521 * @param pReNative The recompiler state.
7522 * @param idxVar The variable.
7523 * @param poff Pointer to the instruction buffer offset.
7524 * In case a register needs to be freed up or the value
7525 * loaded off the stack.
7526 * @param fInitialized Set if the variable must already have been initialized.
7527 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7528 * the case.
7529 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7530 */
7531DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7532 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7533{
7534 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7535 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7536 Assert( pVar->cbVar == sizeof(RTUINT128U)
7537 || pVar->cbVar == sizeof(RTUINT256U));
7538 Assert(!pVar->fRegAcquired);
7539
7540 uint8_t idxReg = pVar->idxReg;
7541 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7542 {
7543 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7544 && pVar->enmKind < kIemNativeVarKind_End);
7545 pVar->fRegAcquired = true;
7546 return idxReg;
7547 }
7548
7549 /*
7550 * If the kind of variable has not yet been set, default to 'stack'.
7551 */
7552 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7553 && pVar->enmKind < kIemNativeVarKind_End);
7554 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7555 iemNativeVarSetKindToStack(pReNative, idxVar);
7556
7557 /*
7558 * We have to allocate a register for the variable, even if its a stack one
7559 * as we don't know if there are modification being made to it before its
7560 * finalized (todo: analyze and insert hints about that?).
7561 *
7562 * If we can, we try get the correct register for argument variables. This
7563 * is assuming that most argument variables are fetched as close as possible
7564 * to the actual call, so that there aren't any interfering hidden calls
7565 * (memory accesses, etc) inbetween.
7566 *
7567 * If we cannot or it's a variable, we make sure no argument registers
7568 * that will be used by this MC block will be allocated here, and we always
7569 * prefer non-volatile registers to avoid needing to spill stuff for internal
7570 * call.
7571 */
7572 /** @todo Detect too early argument value fetches and warn about hidden
7573 * calls causing less optimal code to be generated in the python script. */
7574
7575 uint8_t const uArgNo = pVar->uArgNo;
7576 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7577
7578 /* SIMD is bit simpler for now because there is no support for arguments. */
7579 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7580 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7581 {
7582 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7583 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7584 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7585 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7586 & fNotArgsMask;
7587 if (fRegs)
7588 {
7589 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7590 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7591 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7592 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7593 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7594 }
7595 else
7596 {
7597 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7598 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7599 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7600 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7601 }
7602 }
7603 else
7604 {
7605 idxReg = idxRegPref;
7606 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7607 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7608 }
7609 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7610
7611 pVar->fSimdReg = true;
7612 pVar->idxReg = idxReg;
7613
7614 /*
7615 * Load it off the stack if we've got a stack slot.
7616 */
7617 uint8_t const idxStackSlot = pVar->idxStackSlot;
7618 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7619 {
7620 Assert(fInitialized);
7621 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7622 switch (pVar->cbVar)
7623 {
7624 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7625 default: AssertFailed(); RT_FALL_THRU();
7626 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7627 }
7628 }
7629 else
7630 {
7631 Assert(idxStackSlot == UINT8_MAX);
7632 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7633 }
7634 pVar->fRegAcquired = true;
7635 return idxReg;
7636}
7637#endif
7638
7639
7640/**
7641 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7642 * guest register.
7643 *
7644 * This function makes sure there is a register for it and sets it to be the
7645 * current shadow copy of @a enmGstReg.
7646 *
7647 * @returns The host register number.
7648 * @param pReNative The recompiler state.
7649 * @param idxVar The variable.
7650 * @param enmGstReg The guest register this variable will be written to
7651 * after this call.
7652 * @param poff Pointer to the instruction buffer offset.
7653 * In case a register needs to be freed up or if the
7654 * variable content needs to be loaded off the stack.
7655 *
7656 * @note We DO NOT expect @a idxVar to be an argument variable,
7657 * because we can only in the commit stage of an instruction when this
7658 * function is used.
7659 */
7660DECL_HIDDEN_THROW(uint8_t)
7661iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7662{
7663 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7664 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7665 Assert(!pVar->fRegAcquired);
7666 AssertMsgStmt( pVar->cbVar <= 8
7667 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7668 || pVar->enmKind == kIemNativeVarKind_Stack),
7669 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7670 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7671 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7672
7673 /*
7674 * This shouldn't ever be used for arguments, unless it's in a weird else
7675 * branch that doesn't do any calling and even then it's questionable.
7676 *
7677 * However, in case someone writes crazy wrong MC code and does register
7678 * updates before making calls, just use the regular register allocator to
7679 * ensure we get a register suitable for the intended argument number.
7680 */
7681 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7682
7683 /*
7684 * If there is already a register for the variable, we transfer/set the
7685 * guest shadow copy assignment to it.
7686 */
7687 uint8_t idxReg = pVar->idxReg;
7688 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7689 {
7690#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7691 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7692 {
7693# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7694 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7695 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7696# endif
7697 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7698 }
7699#endif
7700
7701 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7702 {
7703 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7704 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7705 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7706 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7707 }
7708 else
7709 {
7710 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7711 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7712 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7713 }
7714 /** @todo figure this one out. We need some way of making sure the register isn't
7715 * modified after this point, just in case we start writing crappy MC code. */
7716 pVar->enmGstReg = enmGstReg;
7717 pVar->fRegAcquired = true;
7718 return idxReg;
7719 }
7720 Assert(pVar->uArgNo == UINT8_MAX);
7721
7722 /*
7723 * Because this is supposed to be the commit stage, we're just tag along with the
7724 * temporary register allocator and upgrade it to a variable register.
7725 */
7726 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7727 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7728 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7729 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7730 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7731 pVar->idxReg = idxReg;
7732
7733 /*
7734 * Now we need to load the register value.
7735 */
7736 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7737 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7738 else
7739 {
7740 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7741 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7742 switch (pVar->cbVar)
7743 {
7744 case sizeof(uint64_t):
7745 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7746 break;
7747 case sizeof(uint32_t):
7748 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7749 break;
7750 case sizeof(uint16_t):
7751 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7752 break;
7753 case sizeof(uint8_t):
7754 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7755 break;
7756 default:
7757 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7758 }
7759 }
7760
7761 pVar->fRegAcquired = true;
7762 return idxReg;
7763}
7764
7765
7766/**
7767 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7768 *
7769 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7770 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7771 * requirement of flushing anything in volatile host registers when making a
7772 * call.
7773 *
7774 * @returns New @a off value.
7775 * @param pReNative The recompiler state.
7776 * @param off The code buffer position.
7777 * @param fHstRegsNotToSave Set of registers not to save & restore.
7778 */
7779DECL_HIDDEN_THROW(uint32_t)
7780iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7781{
7782 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7783 if (fHstRegs)
7784 {
7785 do
7786 {
7787 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7788 fHstRegs &= ~RT_BIT_32(idxHstReg);
7789
7790 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7791 {
7792 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7793 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7794 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7795 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7796 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7797 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7798 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7799 {
7800 case kIemNativeVarKind_Stack:
7801 {
7802 /* Temporarily spill the variable register. */
7803 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7804 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7805 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7806 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7807 continue;
7808 }
7809
7810 case kIemNativeVarKind_Immediate:
7811 case kIemNativeVarKind_VarRef:
7812 case kIemNativeVarKind_GstRegRef:
7813 /* It is weird to have any of these loaded at this point. */
7814 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7815 continue;
7816
7817 case kIemNativeVarKind_End:
7818 case kIemNativeVarKind_Invalid:
7819 break;
7820 }
7821 AssertFailed();
7822 }
7823 else
7824 {
7825 /*
7826 * Allocate a temporary stack slot and spill the register to it.
7827 */
7828 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7829 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7830 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7831 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7832 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7833 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7834 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7835 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7836 }
7837 } while (fHstRegs);
7838 }
7839#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7840
7841 /*
7842 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7843 * which would be more difficult due to spanning multiple stack slots and different sizes
7844 * (besides we only have a limited amount of slots at the moment).
7845 *
7846 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7847 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7848 */
7849 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7850
7851 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7852 if (fHstRegs)
7853 {
7854 do
7855 {
7856 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7857 fHstRegs &= ~RT_BIT_32(idxHstReg);
7858
7859 /* Fixed reserved and temporary registers don't need saving. */
7860 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7861 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7862 continue;
7863
7864 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7865
7866 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7867 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7868 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7869 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7870 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7871 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7872 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7873 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7874 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7875 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7876 {
7877 case kIemNativeVarKind_Stack:
7878 {
7879 /* Temporarily spill the variable register. */
7880 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7881 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7882 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7883 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7884 if (cbVar == sizeof(RTUINT128U))
7885 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7886 else
7887 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7888 continue;
7889 }
7890
7891 case kIemNativeVarKind_Immediate:
7892 case kIemNativeVarKind_VarRef:
7893 case kIemNativeVarKind_GstRegRef:
7894 /* It is weird to have any of these loaded at this point. */
7895 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7896 continue;
7897
7898 case kIemNativeVarKind_End:
7899 case kIemNativeVarKind_Invalid:
7900 break;
7901 }
7902 AssertFailed();
7903 } while (fHstRegs);
7904 }
7905#endif
7906 return off;
7907}
7908
7909
7910/**
7911 * Emit code to restore volatile registers after to a call to a helper.
7912 *
7913 * @returns New @a off value.
7914 * @param pReNative The recompiler state.
7915 * @param off The code buffer position.
7916 * @param fHstRegsNotToSave Set of registers not to save & restore.
7917 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7918 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7919 */
7920DECL_HIDDEN_THROW(uint32_t)
7921iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7922{
7923 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7924 if (fHstRegs)
7925 {
7926 do
7927 {
7928 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7929 fHstRegs &= ~RT_BIT_32(idxHstReg);
7930
7931 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7932 {
7933 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7934 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7935 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7936 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7937 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7938 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7939 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7940 {
7941 case kIemNativeVarKind_Stack:
7942 {
7943 /* Unspill the variable register. */
7944 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7945 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7946 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7947 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7948 continue;
7949 }
7950
7951 case kIemNativeVarKind_Immediate:
7952 case kIemNativeVarKind_VarRef:
7953 case kIemNativeVarKind_GstRegRef:
7954 /* It is weird to have any of these loaded at this point. */
7955 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7956 continue;
7957
7958 case kIemNativeVarKind_End:
7959 case kIemNativeVarKind_Invalid:
7960 break;
7961 }
7962 AssertFailed();
7963 }
7964 else
7965 {
7966 /*
7967 * Restore from temporary stack slot.
7968 */
7969 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7970 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7971 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7972 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7973
7974 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7975 }
7976 } while (fHstRegs);
7977 }
7978#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7979 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7980 if (fHstRegs)
7981 {
7982 do
7983 {
7984 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7985 fHstRegs &= ~RT_BIT_32(idxHstReg);
7986
7987 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7988 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7989 continue;
7990 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7991
7992 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7993 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7994 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7995 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7996 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7997 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7998 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7999 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8000 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8001 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8002 {
8003 case kIemNativeVarKind_Stack:
8004 {
8005 /* Unspill the variable register. */
8006 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8007 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8008 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8009 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8010
8011 if (cbVar == sizeof(RTUINT128U))
8012 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8013 else
8014 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8015 continue;
8016 }
8017
8018 case kIemNativeVarKind_Immediate:
8019 case kIemNativeVarKind_VarRef:
8020 case kIemNativeVarKind_GstRegRef:
8021 /* It is weird to have any of these loaded at this point. */
8022 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8023 continue;
8024
8025 case kIemNativeVarKind_End:
8026 case kIemNativeVarKind_Invalid:
8027 break;
8028 }
8029 AssertFailed();
8030 } while (fHstRegs);
8031 }
8032#endif
8033 return off;
8034}
8035
8036
8037/**
8038 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8039 *
8040 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8041 *
8042 * ASSUMES that @a idxVar is valid and unpacked.
8043 */
8044DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8045{
8046 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8047 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8048 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8049 {
8050 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8051 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8052 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8053 Assert(cSlots > 0);
8054 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8055 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8056 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8057 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8058 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8059 }
8060 else
8061 Assert(idxStackSlot == UINT8_MAX);
8062}
8063
8064
8065/**
8066 * Worker that frees a single variable.
8067 *
8068 * ASSUMES that @a idxVar is valid and unpacked.
8069 */
8070DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8071{
8072 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8073 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8074 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8075
8076 /* Free the host register first if any assigned. */
8077 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8078#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8079 if ( idxHstReg != UINT8_MAX
8080 && pReNative->Core.aVars[idxVar].fSimdReg)
8081 {
8082 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8083 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8084 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8085 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8086 }
8087 else
8088#endif
8089 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8090 {
8091 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8092 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8093 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8094 }
8095
8096 /* Free argument mapping. */
8097 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8098 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8099 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8100
8101 /* Free the stack slots. */
8102 iemNativeVarFreeStackSlots(pReNative, idxVar);
8103
8104 /* Free the actual variable. */
8105 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8106 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8107}
8108
8109
8110/**
8111 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8112 */
8113DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8114{
8115 while (bmVars != 0)
8116 {
8117 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8118 bmVars &= ~RT_BIT_32(idxVar);
8119
8120#if 1 /** @todo optimize by simplifying this later... */
8121 iemNativeVarFreeOneWorker(pReNative, idxVar);
8122#else
8123 /* Only need to free the host register, the rest is done as bulk updates below. */
8124 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8125 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8126 {
8127 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8128 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8129 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8130 }
8131#endif
8132 }
8133#if 0 /** @todo optimize by simplifying this later... */
8134 pReNative->Core.bmVars = 0;
8135 pReNative->Core.bmStack = 0;
8136 pReNative->Core.u64ArgVars = UINT64_MAX;
8137#endif
8138}
8139
8140
8141
8142/*********************************************************************************************************************************
8143* Emitters for IEM_MC_CALL_CIMPL_XXX *
8144*********************************************************************************************************************************/
8145
8146/**
8147 * Emits code to load a reference to the given guest register into @a idxGprDst.
8148 */
8149DECL_HIDDEN_THROW(uint32_t)
8150iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8151 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8152{
8153#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8154 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8155#endif
8156
8157 /*
8158 * Get the offset relative to the CPUMCTX structure.
8159 */
8160 uint32_t offCpumCtx;
8161 switch (enmClass)
8162 {
8163 case kIemNativeGstRegRef_Gpr:
8164 Assert(idxRegInClass < 16);
8165 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8166 break;
8167
8168 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8169 Assert(idxRegInClass < 4);
8170 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8171 break;
8172
8173 case kIemNativeGstRegRef_EFlags:
8174 Assert(idxRegInClass == 0);
8175 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8176 break;
8177
8178 case kIemNativeGstRegRef_MxCsr:
8179 Assert(idxRegInClass == 0);
8180 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8181 break;
8182
8183 case kIemNativeGstRegRef_FpuReg:
8184 Assert(idxRegInClass < 8);
8185 AssertFailed(); /** @todo what kind of indexing? */
8186 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8187 break;
8188
8189 case kIemNativeGstRegRef_MReg:
8190 Assert(idxRegInClass < 8);
8191 AssertFailed(); /** @todo what kind of indexing? */
8192 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8193 break;
8194
8195 case kIemNativeGstRegRef_XReg:
8196 Assert(idxRegInClass < 16);
8197 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8198 break;
8199
8200 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8201 Assert(idxRegInClass == 0);
8202 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8203 break;
8204
8205 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8206 Assert(idxRegInClass == 0);
8207 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8208 break;
8209
8210 default:
8211 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8212 }
8213
8214 /*
8215 * Load the value into the destination register.
8216 */
8217#ifdef RT_ARCH_AMD64
8218 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8219
8220#elif defined(RT_ARCH_ARM64)
8221 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8222 Assert(offCpumCtx < 4096);
8223 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8224
8225#else
8226# error "Port me!"
8227#endif
8228
8229 return off;
8230}
8231
8232
8233/**
8234 * Common code for CIMPL and AIMPL calls.
8235 *
8236 * These are calls that uses argument variables and such. They should not be
8237 * confused with internal calls required to implement an MC operation,
8238 * like a TLB load and similar.
8239 *
8240 * Upon return all that is left to do is to load any hidden arguments and
8241 * perform the call. All argument variables are freed.
8242 *
8243 * @returns New code buffer offset; throws VBox status code on error.
8244 * @param pReNative The native recompile state.
8245 * @param off The code buffer offset.
8246 * @param cArgs The total nubmer of arguments (includes hidden
8247 * count).
8248 * @param cHiddenArgs The number of hidden arguments. The hidden
8249 * arguments must not have any variable declared for
8250 * them, whereas all the regular arguments must
8251 * (tstIEMCheckMc ensures this).
8252 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8253 * this will still flush pending writes in call volatile registers if false.
8254 */
8255DECL_HIDDEN_THROW(uint32_t)
8256iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8257 bool fFlushPendingWrites /*= true*/)
8258{
8259#ifdef VBOX_STRICT
8260 /*
8261 * Assert sanity.
8262 */
8263 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8264 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8265 for (unsigned i = 0; i < cHiddenArgs; i++)
8266 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8267 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8268 {
8269 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8270 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8271 }
8272 iemNativeRegAssertSanity(pReNative);
8273#endif
8274
8275 /* We don't know what the called function makes use of, so flush any pending register writes. */
8276 RT_NOREF(fFlushPendingWrites);
8277#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8278 if (fFlushPendingWrites)
8279#endif
8280 off = iemNativeRegFlushPendingWrites(pReNative, off);
8281
8282 /*
8283 * Before we do anything else, go over variables that are referenced and
8284 * make sure they are not in a register.
8285 */
8286 uint32_t bmVars = pReNative->Core.bmVars;
8287 if (bmVars)
8288 {
8289 do
8290 {
8291 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8292 bmVars &= ~RT_BIT_32(idxVar);
8293
8294 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8295 {
8296 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8297#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8298 if ( idxRegOld != UINT8_MAX
8299 && pReNative->Core.aVars[idxVar].fSimdReg)
8300 {
8301 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8302 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8303
8304 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8305 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8306 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8307 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8308 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8309 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8310 else
8311 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8312
8313 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8314 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8315
8316 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8317 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8318 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8319 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8320 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8321 }
8322 else
8323#endif
8324 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8325 {
8326 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8327 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8328 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8329 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8330 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8331
8332 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8333 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8334 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8335 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8336 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8337 }
8338 }
8339 } while (bmVars != 0);
8340#if 0 //def VBOX_STRICT
8341 iemNativeRegAssertSanity(pReNative);
8342#endif
8343 }
8344
8345 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8346
8347#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8348 /*
8349 * At the very first step go over the host registers that will be used for arguments
8350 * don't shadow anything which needs writing back first.
8351 */
8352 for (uint32_t i = 0; i < cRegArgs; i++)
8353 {
8354 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8355
8356 /* Writeback any dirty guest shadows before using this register. */
8357 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8358 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8359 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8360 }
8361#endif
8362
8363 /*
8364 * First, go over the host registers that will be used for arguments and make
8365 * sure they either hold the desired argument or are free.
8366 */
8367 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8368 {
8369 for (uint32_t i = 0; i < cRegArgs; i++)
8370 {
8371 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8372 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8373 {
8374 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8375 {
8376 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8377 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8378 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8379 Assert(pVar->idxReg == idxArgReg);
8380 uint8_t const uArgNo = pVar->uArgNo;
8381 if (uArgNo == i)
8382 { /* prefect */ }
8383 /* The variable allocator logic should make sure this is impossible,
8384 except for when the return register is used as a parameter (ARM,
8385 but not x86). */
8386#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8387 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8388 {
8389# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8390# error "Implement this"
8391# endif
8392 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8393 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8394 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8395 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8396 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8397 }
8398#endif
8399 else
8400 {
8401 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8402
8403 if (pVar->enmKind == kIemNativeVarKind_Stack)
8404 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8405 else
8406 {
8407 /* just free it, can be reloaded if used again */
8408 pVar->idxReg = UINT8_MAX;
8409 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8410 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8411 }
8412 }
8413 }
8414 else
8415 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8416 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8417 }
8418 }
8419#if 0 //def VBOX_STRICT
8420 iemNativeRegAssertSanity(pReNative);
8421#endif
8422 }
8423
8424 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8425
8426#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8427 /*
8428 * If there are any stack arguments, make sure they are in their place as well.
8429 *
8430 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8431 * the caller) be loading it later and it must be free (see first loop).
8432 */
8433 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8434 {
8435 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8436 {
8437 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8438 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8439 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8440 {
8441 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8442 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8443 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8444 pVar->idxReg = UINT8_MAX;
8445 }
8446 else
8447 {
8448 /* Use ARG0 as temp for stuff we need registers for. */
8449 switch (pVar->enmKind)
8450 {
8451 case kIemNativeVarKind_Stack:
8452 {
8453 uint8_t const idxStackSlot = pVar->idxStackSlot;
8454 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8455 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8456 iemNativeStackCalcBpDisp(idxStackSlot));
8457 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8458 continue;
8459 }
8460
8461 case kIemNativeVarKind_Immediate:
8462 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8463 continue;
8464
8465 case kIemNativeVarKind_VarRef:
8466 {
8467 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8468 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8469 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8470 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8471 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8472# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8473 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8474 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8475 if ( fSimdReg
8476 && idxRegOther != UINT8_MAX)
8477 {
8478 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8479 if (cbVar == sizeof(RTUINT128U))
8480 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8481 else
8482 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8483 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8484 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8485 }
8486 else
8487# endif
8488 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8489 {
8490 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8491 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8492 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8493 }
8494 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8495 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8496 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8497 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8498 continue;
8499 }
8500
8501 case kIemNativeVarKind_GstRegRef:
8502 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8503 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8504 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8505 continue;
8506
8507 case kIemNativeVarKind_Invalid:
8508 case kIemNativeVarKind_End:
8509 break;
8510 }
8511 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8512 }
8513 }
8514# if 0 //def VBOX_STRICT
8515 iemNativeRegAssertSanity(pReNative);
8516# endif
8517 }
8518#else
8519 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8520#endif
8521
8522 /*
8523 * Make sure the argument variables are loaded into their respective registers.
8524 *
8525 * We can optimize this by ASSUMING that any register allocations are for
8526 * registeres that have already been loaded and are ready. The previous step
8527 * saw to that.
8528 */
8529 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8530 {
8531 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8532 {
8533 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8534 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8535 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8536 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8537 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8538 else
8539 {
8540 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8541 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8542 {
8543 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8544 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8545 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8546 | RT_BIT_32(idxArgReg);
8547 pVar->idxReg = idxArgReg;
8548 }
8549 else
8550 {
8551 /* Use ARG0 as temp for stuff we need registers for. */
8552 switch (pVar->enmKind)
8553 {
8554 case kIemNativeVarKind_Stack:
8555 {
8556 uint8_t const idxStackSlot = pVar->idxStackSlot;
8557 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8558 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8559 continue;
8560 }
8561
8562 case kIemNativeVarKind_Immediate:
8563 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8564 continue;
8565
8566 case kIemNativeVarKind_VarRef:
8567 {
8568 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8569 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8570 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8571 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8572 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8573 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8574#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8575 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8576 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8577 if ( fSimdReg
8578 && idxRegOther != UINT8_MAX)
8579 {
8580 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8581 if (cbVar == sizeof(RTUINT128U))
8582 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8583 else
8584 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8585 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8586 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8587 }
8588 else
8589#endif
8590 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8591 {
8592 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8593 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8594 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8595 }
8596 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8597 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8598 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8599 continue;
8600 }
8601
8602 case kIemNativeVarKind_GstRegRef:
8603 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8604 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8605 continue;
8606
8607 case kIemNativeVarKind_Invalid:
8608 case kIemNativeVarKind_End:
8609 break;
8610 }
8611 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8612 }
8613 }
8614 }
8615#if 0 //def VBOX_STRICT
8616 iemNativeRegAssertSanity(pReNative);
8617#endif
8618 }
8619#ifdef VBOX_STRICT
8620 else
8621 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8622 {
8623 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8624 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8625 }
8626#endif
8627
8628 /*
8629 * Free all argument variables (simplified).
8630 * Their lifetime always expires with the call they are for.
8631 */
8632 /** @todo Make the python script check that arguments aren't used after
8633 * IEM_MC_CALL_XXXX. */
8634 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8635 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8636 * an argument value. There is also some FPU stuff. */
8637 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8638 {
8639 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8640 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8641
8642 /* no need to free registers: */
8643 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8644 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8645 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8646 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8647 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8648 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8649
8650 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8651 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8652 iemNativeVarFreeStackSlots(pReNative, idxVar);
8653 }
8654 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8655
8656 /*
8657 * Flush volatile registers as we make the call.
8658 */
8659 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8660
8661 return off;
8662}
8663
8664
8665
8666/*********************************************************************************************************************************
8667* TLB Lookup. *
8668*********************************************************************************************************************************/
8669
8670/**
8671 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8672 */
8673DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8674{
8675 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8676 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8677 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8678 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8679 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8680
8681 /* Do the lookup manually. */
8682 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8683 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8684 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8685 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8686 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8687 {
8688 /*
8689 * Check TLB page table level access flags.
8690 */
8691 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8692 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8693 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8694 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8695 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8696 | IEMTLBE_F_PG_UNASSIGNED
8697 | IEMTLBE_F_PT_NO_ACCESSED
8698 | fNoWriteNoDirty | fNoUser);
8699 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8700 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8701 {
8702 /*
8703 * Return the address.
8704 */
8705 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8706 if ((uintptr_t)pbAddr == uResult)
8707 return;
8708 RT_NOREF(cbMem);
8709 AssertFailed();
8710 }
8711 else
8712 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8713 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8714 }
8715 else
8716 AssertFailed();
8717 RT_BREAKPOINT();
8718}
8719
8720/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8721
8722
8723
8724/*********************************************************************************************************************************
8725* Recompiler Core. *
8726*********************************************************************************************************************************/
8727
8728/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8729static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8730{
8731 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8732 pDis->cbCachedInstr += cbMaxRead;
8733 RT_NOREF(cbMinRead);
8734 return VERR_NO_DATA;
8735}
8736
8737
8738DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8739{
8740 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8741 {
8742#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8743 ENTRY(fLocalForcedActions),
8744 ENTRY(iem.s.rcPassUp),
8745 ENTRY(iem.s.fExec),
8746 ENTRY(iem.s.pbInstrBuf),
8747 ENTRY(iem.s.uInstrBufPc),
8748 ENTRY(iem.s.GCPhysInstrBuf),
8749 ENTRY(iem.s.cbInstrBufTotal),
8750 ENTRY(iem.s.idxTbCurInstr),
8751 ENTRY(iem.s.fSkippingEFlags),
8752#ifdef VBOX_WITH_STATISTICS
8753 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8754 ENTRY(iem.s.StatNativeTlbHitsForStore),
8755 ENTRY(iem.s.StatNativeTlbHitsForStack),
8756 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8757 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8758 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8759 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8760 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8761#endif
8762 ENTRY(iem.s.DataTlb.uTlbRevision),
8763 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8764 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8765 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8766 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8767 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8768 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8769 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8770 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8771 ENTRY(iem.s.DataTlb.aEntries),
8772 ENTRY(iem.s.CodeTlb.uTlbRevision),
8773 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8774 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8775 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8776 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8777 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8778 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8779 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8780 ENTRY(iem.s.CodeTlb.aEntries),
8781 ENTRY(pVMR3),
8782 ENTRY(cpum.GstCtx.rax),
8783 ENTRY(cpum.GstCtx.ah),
8784 ENTRY(cpum.GstCtx.rcx),
8785 ENTRY(cpum.GstCtx.ch),
8786 ENTRY(cpum.GstCtx.rdx),
8787 ENTRY(cpum.GstCtx.dh),
8788 ENTRY(cpum.GstCtx.rbx),
8789 ENTRY(cpum.GstCtx.bh),
8790 ENTRY(cpum.GstCtx.rsp),
8791 ENTRY(cpum.GstCtx.rbp),
8792 ENTRY(cpum.GstCtx.rsi),
8793 ENTRY(cpum.GstCtx.rdi),
8794 ENTRY(cpum.GstCtx.r8),
8795 ENTRY(cpum.GstCtx.r9),
8796 ENTRY(cpum.GstCtx.r10),
8797 ENTRY(cpum.GstCtx.r11),
8798 ENTRY(cpum.GstCtx.r12),
8799 ENTRY(cpum.GstCtx.r13),
8800 ENTRY(cpum.GstCtx.r14),
8801 ENTRY(cpum.GstCtx.r15),
8802 ENTRY(cpum.GstCtx.es.Sel),
8803 ENTRY(cpum.GstCtx.es.u64Base),
8804 ENTRY(cpum.GstCtx.es.u32Limit),
8805 ENTRY(cpum.GstCtx.es.Attr),
8806 ENTRY(cpum.GstCtx.cs.Sel),
8807 ENTRY(cpum.GstCtx.cs.u64Base),
8808 ENTRY(cpum.GstCtx.cs.u32Limit),
8809 ENTRY(cpum.GstCtx.cs.Attr),
8810 ENTRY(cpum.GstCtx.ss.Sel),
8811 ENTRY(cpum.GstCtx.ss.u64Base),
8812 ENTRY(cpum.GstCtx.ss.u32Limit),
8813 ENTRY(cpum.GstCtx.ss.Attr),
8814 ENTRY(cpum.GstCtx.ds.Sel),
8815 ENTRY(cpum.GstCtx.ds.u64Base),
8816 ENTRY(cpum.GstCtx.ds.u32Limit),
8817 ENTRY(cpum.GstCtx.ds.Attr),
8818 ENTRY(cpum.GstCtx.fs.Sel),
8819 ENTRY(cpum.GstCtx.fs.u64Base),
8820 ENTRY(cpum.GstCtx.fs.u32Limit),
8821 ENTRY(cpum.GstCtx.fs.Attr),
8822 ENTRY(cpum.GstCtx.gs.Sel),
8823 ENTRY(cpum.GstCtx.gs.u64Base),
8824 ENTRY(cpum.GstCtx.gs.u32Limit),
8825 ENTRY(cpum.GstCtx.gs.Attr),
8826 ENTRY(cpum.GstCtx.rip),
8827 ENTRY(cpum.GstCtx.eflags),
8828 ENTRY(cpum.GstCtx.uRipInhibitInt),
8829 ENTRY(cpum.GstCtx.cr0),
8830 ENTRY(cpum.GstCtx.cr4),
8831 ENTRY(cpum.GstCtx.aXcr[0]),
8832 ENTRY(cpum.GstCtx.aXcr[1]),
8833#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8834 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8835 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8836 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8837 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8838 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8839 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8840 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8841 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8842 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8843 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8844 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8845 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8846 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8847 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8848 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8849 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8850 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8851 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8852 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8853 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8854 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8855 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8856 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8857 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8858 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8859 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8860 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8861 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8862 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8863 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8864 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8865 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8866 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8867#endif
8868#undef ENTRY
8869 };
8870#ifdef VBOX_STRICT
8871 static bool s_fOrderChecked = false;
8872 if (!s_fOrderChecked)
8873 {
8874 s_fOrderChecked = true;
8875 uint32_t offPrev = s_aMembers[0].off;
8876 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8877 {
8878 Assert(s_aMembers[i].off > offPrev);
8879 offPrev = s_aMembers[i].off;
8880 }
8881 }
8882#endif
8883
8884 /*
8885 * Binary lookup.
8886 */
8887 unsigned iStart = 0;
8888 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8889 for (;;)
8890 {
8891 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8892 uint32_t const offCur = s_aMembers[iCur].off;
8893 if (off < offCur)
8894 {
8895 if (iCur != iStart)
8896 iEnd = iCur;
8897 else
8898 break;
8899 }
8900 else if (off > offCur)
8901 {
8902 if (iCur + 1 < iEnd)
8903 iStart = iCur + 1;
8904 else
8905 break;
8906 }
8907 else
8908 return s_aMembers[iCur].pszName;
8909 }
8910#ifdef VBOX_WITH_STATISTICS
8911 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8912 return "iem.s.acThreadedFuncStats[iFn]";
8913#endif
8914 return NULL;
8915}
8916
8917
8918/**
8919 * Translates a label to a name.
8920 */
8921static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
8922{
8923 switch (enmLabel)
8924 {
8925#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
8926 STR_CASE_CMN(Invalid);
8927 STR_CASE_CMN(RaiseDe);
8928 STR_CASE_CMN(RaiseUd);
8929 STR_CASE_CMN(RaiseSseRelated);
8930 STR_CASE_CMN(RaiseAvxRelated);
8931 STR_CASE_CMN(RaiseSseAvxFpRelated);
8932 STR_CASE_CMN(RaiseNm);
8933 STR_CASE_CMN(RaiseGp0);
8934 STR_CASE_CMN(RaiseMf);
8935 STR_CASE_CMN(RaiseXf);
8936 STR_CASE_CMN(ObsoleteTb);
8937 STR_CASE_CMN(NeedCsLimChecking);
8938 STR_CASE_CMN(CheckBranchMiss);
8939 STR_CASE_CMN(Return);
8940 STR_CASE_CMN(ReturnBreak);
8941 STR_CASE_CMN(ReturnBreakFF);
8942 STR_CASE_CMN(ReturnWithFlags);
8943 STR_CASE_CMN(ReturnBreakViaLookup);
8944 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
8945 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
8946 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
8947 STR_CASE_CMN(NonZeroRetOrPassUp);
8948#undef STR_CASE_CMN
8949#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
8950 STR_CASE_LBL(If);
8951 STR_CASE_LBL(Else);
8952 STR_CASE_LBL(Endif);
8953 STR_CASE_LBL(CheckIrq);
8954 STR_CASE_LBL(TlbLookup);
8955 STR_CASE_LBL(TlbMiss);
8956 STR_CASE_LBL(TlbDone);
8957 case kIemNativeLabelType_End: break;
8958 }
8959 return NULL;
8960}
8961
8962
8963/** Info for the symbols resolver used when disassembling. */
8964typedef struct IEMNATIVDISASMSYMCTX
8965{
8966 PVMCPU pVCpu;
8967# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8968 PCIEMNATIVEPERCHUNKCTX pCtx;
8969# endif
8970} IEMNATIVDISASMSYMCTX;
8971typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
8972
8973
8974/**
8975 * Resolve address to symbol, if we can.
8976 */
8977static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress)
8978{
8979#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8980 PCIEMNATIVEPERCHUNKCTX pChunkCtx = pSymCtx->pCtx;
8981 if (pChunkCtx)
8982 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
8983 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
8984 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
8985#endif
8986 return NULL;
8987}
8988
8989#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8990
8991/**
8992 * @callback_method_impl{FNDISGETSYMBOL}
8993 */
8994static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
8995 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
8996{
8997 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress);
8998 if (pszSym)
8999 {
9000 *poff = 0;
9001 return RTStrCopy(pszBuf, cchBuf, pszSym);
9002 }
9003 RT_NOREF(pDis, u32Sel);
9004 return VERR_SYMBOL_NOT_FOUND;
9005}
9006
9007#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9008
9009/**
9010 * Annotates an instruction decoded by the capstone disassembler.
9011 */
9012static const char *
9013iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9014{
9015# if defined(RT_ARCH_ARM64)
9016 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9017 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9018 {
9019 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9020 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9021 char const *psz = strchr(pInstr->op_str, '[');
9022 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9023 {
9024 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9025 int32_t off = -1;
9026 psz += 4;
9027 if (*psz == ']')
9028 off = 0;
9029 else if (*psz == ',')
9030 {
9031 psz = RTStrStripL(psz + 1);
9032 if (*psz == '#')
9033 off = RTStrToInt32(&psz[1]);
9034 /** @todo deal with index registers and LSL as well... */
9035 }
9036 if (off >= 0)
9037 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9038 }
9039 }
9040 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9041 {
9042 const char *pszAddr = strchr(pInstr->op_str, '#');
9043 if (pszAddr)
9044 {
9045 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9046 if (uAddr != 0)
9047 return iemNativeDisasmGetSymbol(pSymCtx, uAddr);
9048 }
9049 }
9050# endif
9051 RT_NOREF(pszBuf, cchBuf);
9052 return NULL;
9053}
9054#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9055
9056
9057DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9058{
9059 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9060#if defined(RT_ARCH_AMD64)
9061 static const char * const a_apszMarkers[] =
9062 {
9063 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9064 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9065 };
9066#endif
9067
9068 char szDisBuf[512];
9069 DISSTATE Dis;
9070 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9071 uint32_t const cNative = pTb->Native.cInstructions;
9072 uint32_t offNative = 0;
9073#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9074 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9075#endif
9076 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9077 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9078 : DISCPUMODE_64BIT;
9079#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9080 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9081#else
9082 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu };
9083#endif
9084#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9085 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9086#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9087 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9088#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9089# error "Port me"
9090#else
9091 csh hDisasm = ~(size_t)0;
9092# if defined(RT_ARCH_AMD64)
9093 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9094# elif defined(RT_ARCH_ARM64)
9095 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9096# else
9097# error "Port me"
9098# endif
9099 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9100
9101 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9102 //Assert(rcCs == CS_ERR_OK);
9103#endif
9104
9105 /*
9106 * Print TB info.
9107 */
9108 pHlp->pfnPrintf(pHlp,
9109 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9110 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9111 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9112 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9113#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9114 if (pDbgInfo && pDbgInfo->cEntries > 1)
9115 {
9116 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9117
9118 /*
9119 * This disassembly is driven by the debug info which follows the native
9120 * code and indicates when it starts with the next guest instructions,
9121 * where labels are and such things.
9122 */
9123 uint32_t idxThreadedCall = 0;
9124 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9125 uint8_t idxRange = UINT8_MAX;
9126 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9127 uint32_t offRange = 0;
9128 uint32_t offOpcodes = 0;
9129 uint32_t const cbOpcodes = pTb->cbOpcodes;
9130 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9131 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9132 uint32_t iDbgEntry = 1;
9133 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9134
9135 while (offNative < cNative)
9136 {
9137 /* If we're at or have passed the point where the next chunk of debug
9138 info starts, process it. */
9139 if (offDbgNativeNext <= offNative)
9140 {
9141 offDbgNativeNext = UINT32_MAX;
9142 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9143 {
9144 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9145 {
9146 case kIemTbDbgEntryType_GuestInstruction:
9147 {
9148 /* Did the exec flag change? */
9149 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9150 {
9151 pHlp->pfnPrintf(pHlp,
9152 " fExec change %#08x -> %#08x %s\n",
9153 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9154 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9155 szDisBuf, sizeof(szDisBuf)));
9156 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9157 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9158 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9159 : DISCPUMODE_64BIT;
9160 }
9161
9162 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9163 where the compilation was aborted before the opcode was recorded and the actual
9164 instruction was translated to a threaded call. This may happen when we run out
9165 of ranges, or when some complicated interrupts/FFs are found to be pending or
9166 similar. So, we just deal with it here rather than in the compiler code as it
9167 is a lot simpler to do here. */
9168 if ( idxRange == UINT8_MAX
9169 || idxRange >= cRanges
9170 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9171 {
9172 idxRange += 1;
9173 if (idxRange < cRanges)
9174 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9175 else
9176 continue;
9177 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9178 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9179 + (pTb->aRanges[idxRange].idxPhysPage == 0
9180 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9181 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9182 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9183 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9184 pTb->aRanges[idxRange].idxPhysPage);
9185 GCPhysPc += offRange;
9186 }
9187
9188 /* Disassemble the instruction. */
9189 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9190 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9191 uint32_t cbInstr = 1;
9192 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9193 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9194 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9195 if (RT_SUCCESS(rc))
9196 {
9197 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9198 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9199 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9200 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9201
9202 static unsigned const s_offMarker = 55;
9203 static char const s_szMarker[] = " ; <--- guest";
9204 if (cch < s_offMarker)
9205 {
9206 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9207 cch = s_offMarker;
9208 }
9209 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9210 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9211
9212 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9213 }
9214 else
9215 {
9216 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9217 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9218 cbInstr = 1;
9219 }
9220 GCPhysPc += cbInstr;
9221 offOpcodes += cbInstr;
9222 offRange += cbInstr;
9223 continue;
9224 }
9225
9226 case kIemTbDbgEntryType_ThreadedCall:
9227 pHlp->pfnPrintf(pHlp,
9228 " Call #%u to %s (%u args) - %s\n",
9229 idxThreadedCall,
9230 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9231 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9232 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9233 idxThreadedCall++;
9234 continue;
9235
9236 case kIemTbDbgEntryType_GuestRegShadowing:
9237 {
9238 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9239 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9240 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9241 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9242 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9243 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9244 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9245 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9246 else
9247 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9248 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9249 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9250 continue;
9251 }
9252
9253#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9254 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9255 {
9256 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9257 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9258 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9259 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9260 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9261 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9262 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9263 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9264 else
9265 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9266 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9267 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9268 continue;
9269 }
9270#endif
9271
9272 case kIemTbDbgEntryType_Label:
9273 {
9274 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9275 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9276 {
9277 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9278 ? " ; regs state restored pre-if-block" : "";
9279 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9280 }
9281 else
9282 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9283 continue;
9284 }
9285
9286 case kIemTbDbgEntryType_NativeOffset:
9287 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9288 Assert(offDbgNativeNext >= offNative);
9289 break;
9290
9291#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9292 case kIemTbDbgEntryType_DelayedPcUpdate:
9293 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9294 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9295 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9296 continue;
9297#endif
9298
9299#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9300 case kIemTbDbgEntryType_GuestRegDirty:
9301 {
9302 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9303 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9304 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9305 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9306 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9307 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9308 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9309 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9310 pszGstReg, pszHstReg);
9311 continue;
9312 }
9313
9314 case kIemTbDbgEntryType_GuestRegWriteback:
9315 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9316 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9317 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9318 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9319 continue;
9320#endif
9321
9322 default:
9323 AssertFailed();
9324 }
9325 iDbgEntry++;
9326 break;
9327 }
9328 }
9329
9330 /*
9331 * Disassemble the next native instruction.
9332 */
9333 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9334# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9335 uint32_t cbInstr = sizeof(paNative[0]);
9336 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9337 if (RT_SUCCESS(rc))
9338 {
9339# if defined(RT_ARCH_AMD64)
9340 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9341 {
9342 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9343 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9344 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9345 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9346 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9347 uInfo & 0x8000 ? "recompiled" : "todo");
9348 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9349 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9350 else
9351 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9352 }
9353 else
9354# endif
9355 {
9356 const char *pszAnnotation = NULL;
9357# ifdef RT_ARCH_AMD64
9358 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9359 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9360 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9361 iemNativeDisasmGetSymbolCb, &SymCtx);
9362 PCDISOPPARAM pMemOp;
9363 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9364 pMemOp = &Dis.Param1;
9365 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9366 pMemOp = &Dis.Param2;
9367 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9368 pMemOp = &Dis.Param3;
9369 else
9370 pMemOp = NULL;
9371 if ( pMemOp
9372 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9373 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9374 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9375 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9376
9377#elif defined(RT_ARCH_ARM64)
9378 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9379 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9380 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9381# else
9382# error "Port me"
9383# endif
9384 if (pszAnnotation)
9385 {
9386 static unsigned const s_offAnnotation = 55;
9387 size_t const cchAnnotation = strlen(pszAnnotation);
9388 size_t cchDis = strlen(szDisBuf);
9389 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9390 {
9391 if (cchDis < s_offAnnotation)
9392 {
9393 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9394 cchDis = s_offAnnotation;
9395 }
9396 szDisBuf[cchDis++] = ' ';
9397 szDisBuf[cchDis++] = ';';
9398 szDisBuf[cchDis++] = ' ';
9399 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9400 }
9401 }
9402 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9403 }
9404 }
9405 else
9406 {
9407# if defined(RT_ARCH_AMD64)
9408 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9409 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9410# elif defined(RT_ARCH_ARM64)
9411 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9412# else
9413# error "Port me"
9414# endif
9415 cbInstr = sizeof(paNative[0]);
9416 }
9417 offNative += cbInstr / sizeof(paNative[0]);
9418
9419# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9420 cs_insn *pInstr;
9421 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9422 (uintptr_t)pNativeCur, 1, &pInstr);
9423 if (cInstrs > 0)
9424 {
9425 Assert(cInstrs == 1);
9426 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9427 size_t const cchOp = strlen(pInstr->op_str);
9428# if defined(RT_ARCH_AMD64)
9429 if (pszAnnotation)
9430 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9431 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9432 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9433 else
9434 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9435 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9436
9437# else
9438 if (pszAnnotation)
9439 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9440 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9441 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9442 else
9443 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9444 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9445# endif
9446 offNative += pInstr->size / sizeof(*pNativeCur);
9447 cs_free(pInstr, cInstrs);
9448 }
9449 else
9450 {
9451# if defined(RT_ARCH_AMD64)
9452 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9453 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9454# else
9455 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9456# endif
9457 offNative++;
9458 }
9459# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9460 }
9461 }
9462 else
9463#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9464 {
9465 /*
9466 * No debug info, just disassemble the x86 code and then the native code.
9467 *
9468 * First the guest code:
9469 */
9470 for (unsigned i = 0; i < pTb->cRanges; i++)
9471 {
9472 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9473 + (pTb->aRanges[i].idxPhysPage == 0
9474 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9475 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9476 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9477 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9478 unsigned off = pTb->aRanges[i].offOpcodes;
9479 /** @todo this ain't working when crossing pages! */
9480 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9481 while (off < cbOpcodes)
9482 {
9483 uint32_t cbInstr = 1;
9484 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9485 &pTb->pabOpcodes[off], cbOpcodes - off,
9486 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9487 if (RT_SUCCESS(rc))
9488 {
9489 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9490 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9491 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9492 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9493 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9494 GCPhysPc += cbInstr;
9495 off += cbInstr;
9496 }
9497 else
9498 {
9499 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9500 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9501 break;
9502 }
9503 }
9504 }
9505
9506 /*
9507 * Then the native code:
9508 */
9509 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9510 while (offNative < cNative)
9511 {
9512 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9513# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9514 uint32_t cbInstr = sizeof(paNative[0]);
9515 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9516 if (RT_SUCCESS(rc))
9517 {
9518# if defined(RT_ARCH_AMD64)
9519 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9520 {
9521 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9522 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9523 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9524 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9525 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9526 uInfo & 0x8000 ? "recompiled" : "todo");
9527 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9528 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9529 else
9530 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9531 }
9532 else
9533# endif
9534 {
9535# ifdef RT_ARCH_AMD64
9536 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9537 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9538 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9539 iemNativeDisasmGetSymbolCb, &SymCtx);
9540# elif defined(RT_ARCH_ARM64)
9541 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9542 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9543 iemNativeDisasmGetSymbolCb, &SymCtx);
9544# else
9545# error "Port me"
9546# endif
9547 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9548 }
9549 }
9550 else
9551 {
9552# if defined(RT_ARCH_AMD64)
9553 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9554 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9555# else
9556 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9557# endif
9558 cbInstr = sizeof(paNative[0]);
9559 }
9560 offNative += cbInstr / sizeof(paNative[0]);
9561
9562# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9563 cs_insn *pInstr;
9564 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9565 (uintptr_t)pNativeCur, 1, &pInstr);
9566 if (cInstrs > 0)
9567 {
9568 Assert(cInstrs == 1);
9569 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9570 size_t const cchOp = strlen(pInstr->op_str);
9571# if defined(RT_ARCH_AMD64)
9572 if (pszAnnotation)
9573 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9574 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9575 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9576 else
9577 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9578 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9579
9580# else
9581 if (pszAnnotation)
9582 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9583 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9584 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9585 else
9586 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9587 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9588# endif
9589 offNative += pInstr->size / sizeof(*pNativeCur);
9590 cs_free(pInstr, cInstrs);
9591 }
9592 else
9593 {
9594# if defined(RT_ARCH_AMD64)
9595 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9596 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9597# else
9598 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9599# endif
9600 offNative++;
9601 }
9602# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9603 }
9604 }
9605
9606#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9607 /* Cleanup. */
9608 cs_close(&hDisasm);
9609#endif
9610}
9611
9612
9613#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9614
9615/** Emit alignment padding between labels / functions. */
9616DECL_INLINE_THROW(uint32_t)
9617iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9618{
9619 if (off & fAlignMask)
9620 {
9621 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9622 while (off & fAlignMask)
9623# if defined(RT_ARCH_AMD64)
9624 pCodeBuf[off++] = 0xcc;
9625# elif defined(RT_ARCH_ARM64)
9626 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9627# else
9628# error "port me"
9629# endif
9630 }
9631 return off;
9632}
9633
9634
9635/**
9636 * Called when a new chunk is allocate to emit common per-chunk code.
9637 *
9638 * Allocates a per-chunk context directly from the chunk itself and place the
9639 * common code there.
9640 *
9641 * @returns Pointer to the chunk context start.
9642 * @param pVCpu The cross context virtual CPU structure of the calling
9643 * thread.
9644 * @param idxChunk The index of the chunk being added and requiring a
9645 * common code context.
9646 */
9647DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk)
9648{
9649 /*
9650 * Allocate a new recompiler state (since we're likely to be called while
9651 * the default one is fully loaded already with a recompiled TB).
9652 *
9653 * This is a bit of overkill, but this isn't a frequently used code path.
9654 */
9655 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9656 AssertReturn(pReNative, NULL);
9657
9658# if defined(RT_ARCH_AMD64)
9659 uint32_t const fAlignMask = 15;
9660# elif defined(RT_ARCH_ARM64)
9661 uint32_t const fAlignMask = 31 / 4;
9662# else
9663# error "port me"
9664# endif
9665 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9666 int rc = VINF_SUCCESS;
9667 uint32_t off = 0;
9668
9669 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9670 {
9671 /*
9672 * Emit the epilog code.
9673 */
9674 aoffLabels[kIemNativeLabelType_Return] = off;
9675 off = iemNativeEmitCoreEpilog(pReNative, off);
9676
9677 /*
9678 * Generate special jump labels. All of these gets a copy of the epilog code.
9679 */
9680 static struct
9681 {
9682 IEMNATIVELABELTYPE enmExitReason;
9683 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9684 } const s_aSpecialWithEpilogs[] =
9685 {
9686 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9687 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9688 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9689 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9690 };
9691 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9692 {
9693 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9694 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9695 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9696 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9697 off = iemNativeEmitCoreEpilog(pReNative, off);
9698 }
9699
9700 /*
9701 * Do what iemNativeEmitReturnBreakViaLookup does.
9702 */
9703 static struct
9704 {
9705 IEMNATIVELABELTYPE enmExitReason;
9706 uintptr_t pfnHelper;
9707 } const s_aViaLookup[] =
9708 {
9709 { kIemNativeLabelType_ReturnBreakViaLookup,
9710 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9711 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9712 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9713 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9714 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9715 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9716 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9717 };
9718 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9719 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9720 {
9721 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9722 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9723 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9724 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9725 }
9726
9727 /*
9728 * Generate simple TB tail labels that just calls a help with a pVCpu
9729 * arg and either return or longjmps/throws a non-zero status.
9730 */
9731 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9732 static struct
9733 {
9734 IEMNATIVELABELTYPE enmExitReason;
9735 bool fWithEpilog;
9736 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9737 } const s_aSimpleTailLabels[] =
9738 {
9739 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9740 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9741 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9742 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9743 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9744 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9745 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9746 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9747 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9748 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9749 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9750 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9751 };
9752 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9753 {
9754 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9755 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9756 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9757
9758 /* int pfnCallback(PVMCPUCC pVCpu) */
9759 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9760 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9761
9762 /* jump back to the return sequence / generate a return sequence. */
9763 if (!s_aSimpleTailLabels[i].fWithEpilog)
9764 off = iemNativeEmitJmpToFixed(pReNative, off, aoffLabels[kIemNativeLabelType_Return]);
9765 else
9766 off = iemNativeEmitCoreEpilog(pReNative, off);
9767 }
9768
9769
9770# ifdef VBOX_STRICT
9771 /* Make sure we've generate code for all labels. */
9772 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
9773 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_Return);
9774#endif
9775 }
9776 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9777 {
9778 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
9779 iemNativeTerm(pReNative);
9780 return NULL;
9781 }
9782 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9783
9784 /*
9785 * Allocate memory for the context (first) and the common code (last).
9786 */
9787 PIEMNATIVEPERCHUNKCTX pCtx;
9788 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
9789 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
9790 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
9791 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
9792 AssertLogRelMsgReturn(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk), NULL);
9793
9794 /*
9795 * Copy over the generated code.
9796 * There should be no fixups or labels defined here.
9797 */
9798 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
9799 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
9800
9801 Assert(pReNative->cFixups == 0);
9802 Assert(pReNative->cLabels == 0);
9803
9804 /*
9805 * Initialize the context.
9806 */
9807 AssertCompile(kIemNativeLabelType_Invalid == 0);
9808 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
9809 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
9810 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
9811 {
9812 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_Return);
9813 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
9814 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
9815 }
9816
9817 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
9818
9819 iemNativeTerm(pReNative);
9820 return pCtx;
9821}
9822
9823#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
9824
9825/**
9826 * Recompiles the given threaded TB into a native one.
9827 *
9828 * In case of failure the translation block will be returned as-is.
9829 *
9830 * @returns pTb.
9831 * @param pVCpu The cross context virtual CPU structure of the calling
9832 * thread.
9833 * @param pTb The threaded translation to recompile to native.
9834 */
9835DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9836{
9837#if 0 /* For profiling the native recompiler code. */
9838l_profile_again:
9839#endif
9840 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9841
9842 /*
9843 * The first time thru, we allocate the recompiler state and save it,
9844 * all the other times we'll just reuse the saved one after a quick reset.
9845 */
9846 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9847 if (RT_LIKELY(pReNative))
9848 iemNativeReInit(pReNative, pTb);
9849 else
9850 {
9851 pReNative = iemNativeInit(pVCpu, pTb);
9852 AssertReturn(pReNative, pTb);
9853 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
9854 }
9855
9856#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9857 /*
9858 * First do liveness analysis. This is done backwards.
9859 */
9860 {
9861 uint32_t idxCall = pTb->Thrd.cCalls;
9862 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9863 { /* likely */ }
9864 else
9865 {
9866 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9867 while (idxCall > cAlloc)
9868 cAlloc *= 2;
9869 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9870 AssertReturn(pvNew, pTb);
9871 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9872 pReNative->cLivenessEntriesAlloc = cAlloc;
9873 }
9874 AssertReturn(idxCall > 0, pTb);
9875 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9876
9877 /* The initial (final) entry. */
9878 idxCall--;
9879 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9880
9881 /* Loop backwards thru the calls and fill in the other entries. */
9882 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9883 while (idxCall > 0)
9884 {
9885 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9886 if (pfnLiveness)
9887 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9888 else
9889 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9890 pCallEntry--;
9891 idxCall--;
9892 }
9893
9894# ifdef VBOX_WITH_STATISTICS
9895 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9896 to 'clobbered' rather that 'input'. */
9897 /** @todo */
9898# endif
9899 }
9900#endif
9901
9902 /*
9903 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9904 * for aborting if an error happens.
9905 */
9906 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9907#ifdef LOG_ENABLED
9908 uint32_t const cCallsOrg = cCallsLeft;
9909#endif
9910 uint32_t off = 0;
9911 int rc = VINF_SUCCESS;
9912 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9913 {
9914#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
9915 /*
9916 * Emit prolog code (fixed).
9917 */
9918 off = iemNativeEmitProlog(pReNative, off);
9919#endif
9920
9921 /*
9922 * Convert the calls to native code.
9923 */
9924#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9925 int32_t iGstInstr = -1;
9926#endif
9927#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9928 uint32_t cThreadedCalls = 0;
9929 uint32_t cRecompiledCalls = 0;
9930#endif
9931#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9932 uint32_t idxCurCall = 0;
9933#endif
9934 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9935 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9936 while (cCallsLeft-- > 0)
9937 {
9938 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9939#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9940 pReNative->idxCurCall = idxCurCall;
9941#endif
9942
9943 /*
9944 * Debug info, assembly markup and statistics.
9945 */
9946#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9947 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9948 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9949#endif
9950#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9951 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9952 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9953 {
9954 if (iGstInstr < (int32_t)pTb->cInstructions)
9955 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9956 else
9957 Assert(iGstInstr == pTb->cInstructions);
9958 iGstInstr = pCallEntry->idxInstr;
9959 }
9960 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9961#endif
9962#if defined(VBOX_STRICT)
9963 off = iemNativeEmitMarker(pReNative, off,
9964 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9965#endif
9966#if defined(VBOX_STRICT)
9967 iemNativeRegAssertSanity(pReNative);
9968#endif
9969#ifdef VBOX_WITH_STATISTICS
9970 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9971#endif
9972
9973#if 0
9974 if ( pTb->GCPhysPc == 0x00000000000c1240
9975 && idxCurCall == 67)
9976 off = iemNativeEmitBrk(pReNative, off, 0xf000);
9977#endif
9978
9979 /*
9980 * Actual work.
9981 */
9982 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9983 pfnRecom ? "(recompiled)" : "(todo)"));
9984 if (pfnRecom) /** @todo stats on this. */
9985 {
9986 off = pfnRecom(pReNative, off, pCallEntry);
9987 STAM_REL_STATS({cRecompiledCalls++;});
9988 }
9989 else
9990 {
9991 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9992 STAM_REL_STATS({cThreadedCalls++;});
9993 }
9994 Assert(off <= pReNative->cInstrBufAlloc);
9995 Assert(pReNative->cCondDepth == 0);
9996
9997#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9998 if (LogIs2Enabled())
9999 {
10000 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10001# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10002 static const char s_achState[] = "CUXI";
10003# else
10004 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10005# endif
10006
10007 char szGpr[17];
10008 for (unsigned i = 0; i < 16; i++)
10009 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10010 szGpr[16] = '\0';
10011
10012 char szSegBase[X86_SREG_COUNT + 1];
10013 char szSegLimit[X86_SREG_COUNT + 1];
10014 char szSegAttrib[X86_SREG_COUNT + 1];
10015 char szSegSel[X86_SREG_COUNT + 1];
10016 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10017 {
10018 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10019 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10020 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10021 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10022 }
10023 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10024 = szSegSel[X86_SREG_COUNT] = '\0';
10025
10026 char szEFlags[8];
10027 for (unsigned i = 0; i < 7; i++)
10028 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10029 szEFlags[7] = '\0';
10030
10031 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10032 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10033 }
10034#endif
10035
10036 /*
10037 * Advance.
10038 */
10039 pCallEntry++;
10040#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10041 idxCurCall++;
10042#endif
10043 }
10044
10045 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10046 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10047 if (!cThreadedCalls)
10048 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10049
10050#ifdef VBOX_WITH_STATISTICS
10051 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10052#endif
10053
10054 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10055 off = iemNativeRegFlushPendingWrites(pReNative, off);
10056
10057 /*
10058 * Successful return, so clear the return register (eax, w0).
10059 */
10060 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
10061
10062#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10063 /*
10064 * Emit the epilog code.
10065 */
10066 uint32_t idxReturnLabel;
10067 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10068#else
10069 /*
10070 * Jump to the common per-chunk epilog code.
10071 */
10072 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10073 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_Return);
10074#endif
10075
10076#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10077 /*
10078 * Generate special jump labels.
10079 */
10080 off = iemNativeEmitRcFiddling(pReNative, off, idxReturnLabel);
10081
10082 bool const fReturnBreakViaLookup = RT_BOOL( pReNative->bmLabelTypes
10083 & ( RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookup)
10084 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithIrq)
10085 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlb)
10086 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq)));
10087 if (fReturnBreakViaLookup)
10088 {
10089 uint32_t const idxReturnBreakLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
10090 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10091 off = iemNativeEmitReturnBreakViaLookup(pReNative, off, idxReturnBreakLabel);
10092 }
10093 else if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10094 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10095
10096 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
10097 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
10098
10099 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10100 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10101
10102 /*
10103 * Generate simple TB tail labels that just calls a help with a pVCpu
10104 * arg and either return or longjmps/throws a non-zero status.
10105 *
10106 * The array entries must be ordered by enmLabel value so we can index
10107 * using fTailLabels bit numbers.
10108 */
10109 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10110 static struct
10111 {
10112 IEMNATIVELABELTYPE enmLabel;
10113 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10114 } const g_aSimpleTailLabels[] =
10115 {
10116 { kIemNativeLabelType_Invalid, NULL },
10117 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10118 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10119 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10120 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10121 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10122 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10123 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10124 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10125 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10126 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10127 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10128 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10129 };
10130
10131 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10132 AssertCompile(kIemNativeLabelType_Invalid == 0);
10133 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10134 if (fTailLabels)
10135 {
10136 do
10137 {
10138 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10139 fTailLabels &= ~RT_BIT_64(enmLabel);
10140 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10141
10142 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10143 Assert(idxLabel != UINT32_MAX);
10144 if (idxLabel != UINT32_MAX)
10145 {
10146 iemNativeLabelDefine(pReNative, idxLabel, off);
10147
10148 /* int pfnCallback(PVMCPUCC pVCpu) */
10149 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10150 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10151
10152 /* jump back to the return sequence. */
10153 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10154 }
10155
10156 } while (fTailLabels);
10157 }
10158
10159#else /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10160 /*
10161 * Generate tail labels with jumps to the common per-chunk code.
10162 */
10163# ifndef RT_ARCH_AMD64
10164 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_Return) | RT_BIT_64(kIemNativeLabelType_Invalid))));
10165 AssertCompile(kIemNativeLabelType_Invalid == 0);
10166 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10167 if (fTailLabels)
10168 {
10169 do
10170 {
10171 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10172 fTailLabels &= ~RT_BIT_64(enmLabel);
10173
10174 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10175 AssertContinue(idxLabel != UINT32_MAX);
10176 iemNativeLabelDefine(pReNative, idxLabel, off);
10177 off = iemNativeEmitTbExit(pReNative, off, enmLabel);
10178 } while (fTailLabels);
10179 }
10180# else
10181 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10182# endif
10183#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10184 }
10185 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10186 {
10187 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10188 return pTb;
10189 }
10190 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10191 Assert(off <= pReNative->cInstrBufAlloc);
10192
10193 /*
10194 * Make sure all labels has been defined.
10195 */
10196 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10197#ifdef VBOX_STRICT
10198 uint32_t const cLabels = pReNative->cLabels;
10199 for (uint32_t i = 0; i < cLabels; i++)
10200 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10201#endif
10202
10203#if 0 /* For profiling the native recompiler code. */
10204 if (pTb->Thrd.cCalls >= 136)
10205 {
10206 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10207 goto l_profile_again;
10208 }
10209#endif
10210
10211 /*
10212 * Allocate executable memory, copy over the code we've generated.
10213 */
10214 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10215 if (pTbAllocator->pDelayedFreeHead)
10216 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10217
10218 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10219#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10220 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10221 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10222 &paFinalInstrBufRx, &pCtx);
10223
10224#else
10225 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10226 &paFinalInstrBufRx, NULL);
10227#endif
10228 AssertReturn(paFinalInstrBuf, pTb);
10229 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10230
10231 /*
10232 * Apply fixups.
10233 */
10234 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10235 uint32_t const cFixups = pReNative->cFixups;
10236 for (uint32_t i = 0; i < cFixups; i++)
10237 {
10238 Assert(paFixups[i].off < off);
10239 Assert(paFixups[i].idxLabel < cLabels);
10240 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10241 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10242 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10243 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10244 switch (paFixups[i].enmType)
10245 {
10246#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10247 case kIemNativeFixupType_Rel32:
10248 Assert(paFixups[i].off + 4 <= off);
10249 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10250 continue;
10251
10252#elif defined(RT_ARCH_ARM64)
10253 case kIemNativeFixupType_RelImm26At0:
10254 {
10255 Assert(paFixups[i].off < off);
10256 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10257 Assert(offDisp >= -33554432 && offDisp < 33554432);
10258 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10259 continue;
10260 }
10261
10262 case kIemNativeFixupType_RelImm19At5:
10263 {
10264 Assert(paFixups[i].off < off);
10265 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10266 Assert(offDisp >= -262144 && offDisp < 262144);
10267 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10268 continue;
10269 }
10270
10271 case kIemNativeFixupType_RelImm14At5:
10272 {
10273 Assert(paFixups[i].off < off);
10274 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10275 Assert(offDisp >= -8192 && offDisp < 8192);
10276 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10277 continue;
10278 }
10279
10280#endif
10281 case kIemNativeFixupType_Invalid:
10282 case kIemNativeFixupType_End:
10283 break;
10284 }
10285 AssertFailed();
10286 }
10287
10288#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10289 /*
10290 * Apply TB exit fixups.
10291 */
10292 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10293 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10294 for (uint32_t i = 0; i < cTbExitFixups; i++)
10295 {
10296 Assert(paTbExitFixups[i].off < off);
10297 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10298 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10299
10300# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10301 Assert(paTbExitFixups[i].off + 4 <= off);
10302 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10303 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10304 *Ptr.pi32 = (int32_t)offDisp;
10305
10306# elif defined(RT_ARCH_ARM64)
10307 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10308 Assert(offDisp >= -33554432 && offDisp < 33554432);
10309 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10310
10311# else
10312# error "Port me!"
10313# endif
10314 }
10315#endif
10316
10317 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10318 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10319
10320 /*
10321 * Convert the translation block.
10322 */
10323 RTMemFree(pTb->Thrd.paCalls);
10324 pTb->Native.paInstructions = paFinalInstrBufRx;
10325 pTb->Native.cInstructions = off;
10326 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10327#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10328 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10329 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10330#endif
10331
10332 Assert(pTbAllocator->cThreadedTbs > 0);
10333 pTbAllocator->cThreadedTbs -= 1;
10334 pTbAllocator->cNativeTbs += 1;
10335 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10336
10337#ifdef LOG_ENABLED
10338 /*
10339 * Disassemble to the log if enabled.
10340 */
10341 if (LogIs3Enabled())
10342 {
10343 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10344 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10345# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10346 RTLogFlush(NULL);
10347# endif
10348 }
10349#endif
10350 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10351
10352 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10353 return pTb;
10354}
10355
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette