VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 105261

Last change on this file since 105261 was 105261, checked in by vboxsync, 5 months ago

VMM/IEM: Share epilog and other tail code on a per-chunk basis (due to jump range). bugref:10677

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 449.2 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 105261 2024-07-10 14:51:55Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80
81
82/*
83 * Narrow down configs here to avoid wasting time on unused configs here.
84 * Note! Same checks in IEMAllThrdRecompiler.cpp.
85 */
86
87#ifndef IEM_WITH_CODE_TLB
88# error The code TLB must be enabled for the recompiler.
89#endif
90
91#ifndef IEM_WITH_DATA_TLB
92# error The data TLB must be enabled for the recompiler.
93#endif
94
95#ifndef IEM_WITH_SETJMP
96# error The setjmp approach must be enabled for the recompiler.
97#endif
98
99/** @todo eliminate this clang build hack. */
100#if RT_CLANG_PREREQ(4, 0)
101# pragma GCC diagnostic ignored "-Wunused-function"
102#endif
103
104
105/*********************************************************************************************************************************
106* Internal Functions *
107*********************************************************************************************************************************/
108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
109static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
110#endif
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
113 IEMNATIVEGSTREG enmGstReg, uint32_t off);
114DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
115static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
116
117
118
119/*********************************************************************************************************************************
120* Native Recompilation *
121*********************************************************************************************************************************/
122
123
124/**
125 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
126 */
127IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
128{
129 pVCpu->iem.s.cInstructions += idxInstr;
130 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
131}
132
133
134/**
135 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
136 */
137DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
138{
139 uint64_t fCpu = pVCpu->fLocalForcedActions;
140 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
141 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
142 | VMCPU_FF_TLB_FLUSH
143 | VMCPU_FF_UNHALT );
144 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
145 if (RT_LIKELY( ( !fCpu
146 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
147 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
148 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
149 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
150 return false;
151 return true;
152}
153
154
155/**
156 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
157 */
158template <bool const a_fWithIrqCheck>
159IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
160 uint32_t fFlags, RTGCPHYS GCPhysPc))
161{
162 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
163 Assert(idxTbLookup < pTb->cTbLookupEntries);
164 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
165#if 1
166 PIEMTB const pNewTb = *ppNewTb;
167 if (pNewTb)
168 {
169# ifdef VBOX_STRICT
170 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
171 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
172 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
173 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
174 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
175 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
176# endif
177 if (pNewTb->GCPhysPc == GCPhysPc)
178 {
179# ifdef VBOX_STRICT
180 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
181 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
182 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
183 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
184 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
185# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
186 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
187# else
188 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
189 {
190 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
191 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
192 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
193 }
194# endif
195 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
196 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
197 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
198#endif
199
200 /*
201 * Check them + type.
202 */
203 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
204 {
205 /*
206 * Check for interrupts and stuff.
207 */
208 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
209 * The main problem are the statistics and to some degree the logging. :/ */
210 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
211 {
212 /* Do polling. */
213 uint64_t const cTbExecNative = pVCpu->iem.s.cTbExecNative;
214 if ( RT_LIKELY(cTbExecNative & 511)
215 || !TMTimerPollBoolWith32BitMilliTS(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->iem.s.msRecompilerPollNow) )
216 {
217 /*
218 * Success. Update statistics and switch to the next TB.
219 */
220 pVCpu->iem.s.cTbExecNative = cTbExecNative + 1;
221 if (a_fWithIrqCheck)
222 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
223 else
224 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
225
226 pNewTb->cUsed += 1;
227 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
228 pVCpu->iem.s.pCurTbR3 = pNewTb;
229 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
231 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
232 return (uintptr_t)pNewTb->Native.paInstructions;
233 }
234 }
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
236 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
237 }
238 else
239 {
240 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
241 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
242 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
243 }
244 }
245 else
246 {
247 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
249 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
250 }
251 }
252 else
253 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
254#else
255 NOREF(GCPhysPc);
256#endif
257
258 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
259 return 0;
260}
261
262
263/**
264 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
265 */
266template <bool const a_fWithIrqCheck>
267IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
268{
269 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
270 Assert(idxTbLookup < pTb->cTbLookupEntries);
271 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
272#if 1
273 PIEMTB const pNewTb = *ppNewTb;
274 if (pNewTb)
275 {
276 /*
277 * Calculate the flags for the next TB and check if they match.
278 */
279 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
280 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
281 { /* likely */ }
282 else
283 {
284 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
285 fFlags |= IEMTB_F_INHIBIT_SHADOW;
286 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
287 fFlags |= IEMTB_F_INHIBIT_NMI;
288 }
289 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
290 {
291 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
292 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
293 { /* likely */ }
294 else
295 fFlags |= IEMTB_F_CS_LIM_CHECKS;
296 }
297 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
298
299 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
300 {
301 /*
302 * Do the TLB lookup for flat RIP and compare the result with the next TB.
303 *
304 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
305 */
306 /* Calc the effective PC. */
307 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
308 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
309 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
310
311 /* Advance within the current buffer (PAGE) when possible. */
312 RTGCPHYS GCPhysPc;
313 uint64_t off;
314 if ( pVCpu->iem.s.pbInstrBuf
315 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
316 {
317 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
318 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
319 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
320 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
321 else
322 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
323 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
324 }
325 else
326 {
327 pVCpu->iem.s.pbInstrBuf = NULL;
328 pVCpu->iem.s.offCurInstrStart = 0;
329 pVCpu->iem.s.offInstrNextByte = 0;
330 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
331 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
332 }
333
334 if (pNewTb->GCPhysPc == GCPhysPc)
335 {
336 /*
337 * Check for interrupts and stuff.
338 */
339 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
340 * The main problem are the statistics and to some degree the logging. :/ */
341 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
342 {
343 /* Do polling. */
344 uint64_t const cTbExecNative = pVCpu->iem.s.cTbExecNative;
345 if ( RT_LIKELY(cTbExecNative & 511)
346 || !TMTimerPollBoolWith32BitMilliTS(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->iem.s.msRecompilerPollNow) )
347 {
348 /*
349 * Success. Update statistics and switch to the next TB.
350 */
351 pVCpu->iem.s.cTbExecNative = cTbExecNative + 1;
352 if (a_fWithIrqCheck)
353 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
354 else
355 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
356
357 pNewTb->cUsed += 1;
358 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
359 pVCpu->iem.s.pCurTbR3 = pNewTb;
360 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
361 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
362 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
363 return (uintptr_t)pNewTb->Native.paInstructions;
364 }
365 }
366 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
367 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
368 }
369 else
370 {
371 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
372 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
373 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
374 }
375 }
376 else
377 {
378 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
379 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
380 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
381 }
382 }
383 else
384 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
385#else
386 NOREF(fFlags);
387 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
388#endif
389
390 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
391 return 0;
392}
393
394
395/**
396 * Used by TB code when it wants to raise a \#DE.
397 */
398IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
399{
400 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
401 iemRaiseDivideErrorJmp(pVCpu);
402#ifndef _MSC_VER
403 return VINF_IEM_RAISED_XCPT; /* not reached */
404#endif
405}
406
407
408/**
409 * Used by TB code when it wants to raise a \#UD.
410 */
411IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
412{
413 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
414 iemRaiseUndefinedOpcodeJmp(pVCpu);
415#ifndef _MSC_VER
416 return VINF_IEM_RAISED_XCPT; /* not reached */
417#endif
418}
419
420
421/**
422 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
423 *
424 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
425 */
426IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
427{
428 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
429 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
430 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
431 iemRaiseUndefinedOpcodeJmp(pVCpu);
432 else
433 iemRaiseDeviceNotAvailableJmp(pVCpu);
434#ifndef _MSC_VER
435 return VINF_IEM_RAISED_XCPT; /* not reached */
436#endif
437}
438
439
440/**
441 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
442 *
443 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
444 */
445IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
446{
447 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
448 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
449 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
450 iemRaiseUndefinedOpcodeJmp(pVCpu);
451 else
452 iemRaiseDeviceNotAvailableJmp(pVCpu);
453#ifndef _MSC_VER
454 return VINF_IEM_RAISED_XCPT; /* not reached */
455#endif
456}
457
458
459/**
460 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
461 *
462 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
463 */
464IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
465{
466 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
467 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
468 iemRaiseSimdFpExceptionJmp(pVCpu);
469 else
470 iemRaiseUndefinedOpcodeJmp(pVCpu);
471#ifndef _MSC_VER
472 return VINF_IEM_RAISED_XCPT; /* not reached */
473#endif
474}
475
476
477/**
478 * Used by TB code when it wants to raise a \#NM.
479 */
480IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
481{
482 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
483 iemRaiseDeviceNotAvailableJmp(pVCpu);
484#ifndef _MSC_VER
485 return VINF_IEM_RAISED_XCPT; /* not reached */
486#endif
487}
488
489
490/**
491 * Used by TB code when it wants to raise a \#GP(0).
492 */
493IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
494{
495 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
496 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
497#ifndef _MSC_VER
498 return VINF_IEM_RAISED_XCPT; /* not reached */
499#endif
500}
501
502
503/**
504 * Used by TB code when it wants to raise a \#MF.
505 */
506IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
507{
508 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
509 iemRaiseMathFaultJmp(pVCpu);
510#ifndef _MSC_VER
511 return VINF_IEM_RAISED_XCPT; /* not reached */
512#endif
513}
514
515
516/**
517 * Used by TB code when it wants to raise a \#XF.
518 */
519IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
520{
521 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
522 iemRaiseSimdFpExceptionJmp(pVCpu);
523#ifndef _MSC_VER
524 return VINF_IEM_RAISED_XCPT; /* not reached */
525#endif
526}
527
528
529/**
530 * Used by TB code when detecting opcode changes.
531 * @see iemThreadeFuncWorkerObsoleteTb
532 */
533IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
534{
535 /* We set fSafeToFree to false where as we're being called in the context
536 of a TB callback function, which for native TBs means we cannot release
537 the executable memory till we've returned our way back to iemTbExec as
538 that return path codes via the native code generated for the TB. */
539 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
540 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
541 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
542 return VINF_IEM_REEXEC_BREAK;
543}
544
545
546/**
547 * Used by TB code when we need to switch to a TB with CS.LIM checking.
548 */
549IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
550{
551 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
552 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
553 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
554 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
555 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
556 return VINF_IEM_REEXEC_BREAK;
557}
558
559
560/**
561 * Used by TB code when we missed a PC check after a branch.
562 */
563IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
564{
565 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
566 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
567 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
568 pVCpu->iem.s.pbInstrBuf));
569 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
570 return VINF_IEM_REEXEC_BREAK;
571}
572
573
574
575/*********************************************************************************************************************************
576* Helpers: Segmented memory fetches and stores. *
577*********************************************************************************************************************************/
578
579/**
580 * Used by TB code to load unsigned 8-bit data w/ segmentation.
581 */
582IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
583{
584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
585 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
586#else
587 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
588#endif
589}
590
591
592/**
593 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
594 * to 16 bits.
595 */
596IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
597{
598#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
599 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
600#else
601 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
602#endif
603}
604
605
606/**
607 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
608 * to 32 bits.
609 */
610IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
611{
612#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
613 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
614#else
615 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
616#endif
617}
618
619/**
620 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
621 * to 64 bits.
622 */
623IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
624{
625#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
626 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
627#else
628 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
629#endif
630}
631
632
633/**
634 * Used by TB code to load unsigned 16-bit data w/ segmentation.
635 */
636IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
637{
638#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
639 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
640#else
641 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
642#endif
643}
644
645
646/**
647 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
648 * to 32 bits.
649 */
650IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
651{
652#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
653 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
654#else
655 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
656#endif
657}
658
659
660/**
661 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
662 * to 64 bits.
663 */
664IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
665{
666#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
667 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
668#else
669 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
670#endif
671}
672
673
674/**
675 * Used by TB code to load unsigned 32-bit data w/ segmentation.
676 */
677IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
678{
679#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
680 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
681#else
682 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
683#endif
684}
685
686
687/**
688 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
689 * to 64 bits.
690 */
691IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
692{
693#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
694 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
695#else
696 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
697#endif
698}
699
700
701/**
702 * Used by TB code to load unsigned 64-bit data w/ segmentation.
703 */
704IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
705{
706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
707 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
708#else
709 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
710#endif
711}
712
713
714#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
715/**
716 * Used by TB code to load 128-bit data w/ segmentation.
717 */
718IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
719{
720#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
721 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
722#else
723 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
724#endif
725}
726
727
728/**
729 * Used by TB code to load 128-bit data w/ segmentation.
730 */
731IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
732{
733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
734 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
735#else
736 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
737#endif
738}
739
740
741/**
742 * Used by TB code to load 128-bit data w/ segmentation.
743 */
744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
745{
746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
747 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
748#else
749 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
750#endif
751}
752
753
754/**
755 * Used by TB code to load 256-bit data w/ segmentation.
756 */
757IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
758{
759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
760 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
761#else
762 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
763#endif
764}
765
766
767/**
768 * Used by TB code to load 256-bit data w/ segmentation.
769 */
770IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
771{
772#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
773 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
774#else
775 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
776#endif
777}
778#endif
779
780
781/**
782 * Used by TB code to store unsigned 8-bit data w/ segmentation.
783 */
784IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
785{
786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
787 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
788#else
789 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
790#endif
791}
792
793
794/**
795 * Used by TB code to store unsigned 16-bit data w/ segmentation.
796 */
797IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
798{
799#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
800 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
801#else
802 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
803#endif
804}
805
806
807/**
808 * Used by TB code to store unsigned 32-bit data w/ segmentation.
809 */
810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
811{
812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
813 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
814#else
815 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
816#endif
817}
818
819
820/**
821 * Used by TB code to store unsigned 64-bit data w/ segmentation.
822 */
823IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
824{
825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
826 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
827#else
828 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
829#endif
830}
831
832
833#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
834/**
835 * Used by TB code to store unsigned 128-bit data w/ segmentation.
836 */
837IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
838{
839#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
840 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
841#else
842 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
843#endif
844}
845
846
847/**
848 * Used by TB code to store unsigned 128-bit data w/ segmentation.
849 */
850IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
851{
852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
853 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
854#else
855 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
856#endif
857}
858
859
860/**
861 * Used by TB code to store unsigned 256-bit data w/ segmentation.
862 */
863IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
864{
865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
866 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
867#else
868 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
869#endif
870}
871
872
873/**
874 * Used by TB code to store unsigned 256-bit data w/ segmentation.
875 */
876IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
877{
878#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
879 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
880#else
881 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
882#endif
883}
884#endif
885
886
887
888/**
889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
890 */
891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
892{
893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
895#else
896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
897#endif
898}
899
900
901/**
902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
903 */
904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
905{
906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
908#else
909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
910#endif
911}
912
913
914/**
915 * Used by TB code to store an 32-bit selector value onto a generic stack.
916 *
917 * Intel CPUs doesn't do write a whole dword, thus the special function.
918 */
919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
920{
921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
923#else
924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
925#endif
926}
927
928
929/**
930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
931 */
932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
933{
934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
936#else
937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
938#endif
939}
940
941
942/**
943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
944 */
945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
946{
947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
949#else
950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
951#endif
952}
953
954
955/**
956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
957 */
958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
959{
960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
962#else
963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
964#endif
965}
966
967
968/**
969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
970 */
971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
972{
973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
975#else
976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
977#endif
978}
979
980
981
982/*********************************************************************************************************************************
983* Helpers: Flat memory fetches and stores. *
984*********************************************************************************************************************************/
985
986/**
987 * Used by TB code to load unsigned 8-bit data w/ flat address.
988 * @note Zero extending the value to 64-bit to simplify assembly.
989 */
990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
991{
992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
994#else
995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
996#endif
997}
998
999
1000/**
1001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1002 * to 16 bits.
1003 * @note Zero extending the value to 64-bit to simplify assembly.
1004 */
1005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1006{
1007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1009#else
1010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1011#endif
1012}
1013
1014
1015/**
1016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1017 * to 32 bits.
1018 * @note Zero extending the value to 64-bit to simplify assembly.
1019 */
1020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1021{
1022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1024#else
1025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1026#endif
1027}
1028
1029
1030/**
1031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1032 * to 64 bits.
1033 */
1034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1035{
1036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1038#else
1039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1040#endif
1041}
1042
1043
1044/**
1045 * Used by TB code to load unsigned 16-bit data w/ flat address.
1046 * @note Zero extending the value to 64-bit to simplify assembly.
1047 */
1048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1049{
1050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1052#else
1053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1054#endif
1055}
1056
1057
1058/**
1059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1060 * to 32 bits.
1061 * @note Zero extending the value to 64-bit to simplify assembly.
1062 */
1063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1064{
1065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1067#else
1068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1069#endif
1070}
1071
1072
1073/**
1074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1075 * to 64 bits.
1076 * @note Zero extending the value to 64-bit to simplify assembly.
1077 */
1078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1079{
1080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1082#else
1083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1084#endif
1085}
1086
1087
1088/**
1089 * Used by TB code to load unsigned 32-bit data w/ flat address.
1090 * @note Zero extending the value to 64-bit to simplify assembly.
1091 */
1092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1093{
1094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1096#else
1097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1098#endif
1099}
1100
1101
1102/**
1103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1104 * to 64 bits.
1105 * @note Zero extending the value to 64-bit to simplify assembly.
1106 */
1107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1108{
1109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1111#else
1112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1113#endif
1114}
1115
1116
1117/**
1118 * Used by TB code to load unsigned 64-bit data w/ flat address.
1119 */
1120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1121{
1122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1124#else
1125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1126#endif
1127}
1128
1129
1130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1131/**
1132 * Used by TB code to load unsigned 128-bit data w/ flat address.
1133 */
1134IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1135{
1136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1137 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1138#else
1139 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1140#endif
1141}
1142
1143
1144/**
1145 * Used by TB code to load unsigned 128-bit data w/ flat address.
1146 */
1147IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1148{
1149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1150 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1151#else
1152 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1153#endif
1154}
1155
1156
1157/**
1158 * Used by TB code to load unsigned 128-bit data w/ flat address.
1159 */
1160IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1161{
1162#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1163 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1164#else
1165 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1166#endif
1167}
1168
1169
1170/**
1171 * Used by TB code to load unsigned 256-bit data w/ flat address.
1172 */
1173IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1174{
1175#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1176 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1177#else
1178 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1179#endif
1180}
1181
1182
1183/**
1184 * Used by TB code to load unsigned 256-bit data w/ flat address.
1185 */
1186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1187{
1188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1189 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1190#else
1191 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1192#endif
1193}
1194#endif
1195
1196
1197/**
1198 * Used by TB code to store unsigned 8-bit data w/ flat address.
1199 */
1200IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1201{
1202#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1203 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1204#else
1205 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1206#endif
1207}
1208
1209
1210/**
1211 * Used by TB code to store unsigned 16-bit data w/ flat address.
1212 */
1213IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1214{
1215#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1216 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1217#else
1218 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1219#endif
1220}
1221
1222
1223/**
1224 * Used by TB code to store unsigned 32-bit data w/ flat address.
1225 */
1226IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1227{
1228#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1229 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1230#else
1231 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1232#endif
1233}
1234
1235
1236/**
1237 * Used by TB code to store unsigned 64-bit data w/ flat address.
1238 */
1239IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1240{
1241#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1242 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1243#else
1244 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1245#endif
1246}
1247
1248
1249#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1250/**
1251 * Used by TB code to store unsigned 128-bit data w/ flat address.
1252 */
1253IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1254{
1255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1256 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1257#else
1258 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1259#endif
1260}
1261
1262
1263/**
1264 * Used by TB code to store unsigned 128-bit data w/ flat address.
1265 */
1266IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1267{
1268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1269 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1270#else
1271 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1272#endif
1273}
1274
1275
1276/**
1277 * Used by TB code to store unsigned 256-bit data w/ flat address.
1278 */
1279IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1280{
1281#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1282 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1283#else
1284 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1285#endif
1286}
1287
1288
1289/**
1290 * Used by TB code to store unsigned 256-bit data w/ flat address.
1291 */
1292IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1293{
1294#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1295 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1296#else
1297 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1298#endif
1299}
1300#endif
1301
1302
1303
1304/**
1305 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1306 */
1307IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1308{
1309#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1310 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1311#else
1312 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1313#endif
1314}
1315
1316
1317/**
1318 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1319 */
1320IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1321{
1322#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1323 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1324#else
1325 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1326#endif
1327}
1328
1329
1330/**
1331 * Used by TB code to store a segment selector value onto a flat stack.
1332 *
1333 * Intel CPUs doesn't do write a whole dword, thus the special function.
1334 */
1335IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1336{
1337#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1338 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1339#else
1340 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1341#endif
1342}
1343
1344
1345/**
1346 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1347 */
1348IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1349{
1350#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1351 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1352#else
1353 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1354#endif
1355}
1356
1357
1358/**
1359 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1360 */
1361IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1362{
1363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1364 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1365#else
1366 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1367#endif
1368}
1369
1370
1371/**
1372 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1373 */
1374IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1375{
1376#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1377 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1378#else
1379 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1380#endif
1381}
1382
1383
1384/**
1385 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1386 */
1387IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1388{
1389#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1390 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1391#else
1392 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1393#endif
1394}
1395
1396
1397
1398/*********************************************************************************************************************************
1399* Helpers: Segmented memory mapping. *
1400*********************************************************************************************************************************/
1401
1402/**
1403 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1404 * segmentation.
1405 */
1406IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1407 RTGCPTR GCPtrMem, uint8_t iSegReg))
1408{
1409#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1410 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1411#else
1412 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1413#endif
1414}
1415
1416
1417/**
1418 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1419 */
1420IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1421 RTGCPTR GCPtrMem, uint8_t iSegReg))
1422{
1423#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1424 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1425#else
1426 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1427#endif
1428}
1429
1430
1431/**
1432 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1433 */
1434IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1435 RTGCPTR GCPtrMem, uint8_t iSegReg))
1436{
1437#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1438 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1439#else
1440 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1441#endif
1442}
1443
1444
1445/**
1446 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1447 */
1448IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1449 RTGCPTR GCPtrMem, uint8_t iSegReg))
1450{
1451#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1452 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1453#else
1454 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1455#endif
1456}
1457
1458
1459/**
1460 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1461 * segmentation.
1462 */
1463IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1464 RTGCPTR GCPtrMem, uint8_t iSegReg))
1465{
1466#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1467 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1468#else
1469 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1470#endif
1471}
1472
1473
1474/**
1475 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1476 */
1477IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1478 RTGCPTR GCPtrMem, uint8_t iSegReg))
1479{
1480#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1481 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1482#else
1483 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1484#endif
1485}
1486
1487
1488/**
1489 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1490 */
1491IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1492 RTGCPTR GCPtrMem, uint8_t iSegReg))
1493{
1494#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1495 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1496#else
1497 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1498#endif
1499}
1500
1501
1502/**
1503 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1504 */
1505IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1506 RTGCPTR GCPtrMem, uint8_t iSegReg))
1507{
1508#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1509 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1510#else
1511 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1512#endif
1513}
1514
1515
1516/**
1517 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1518 * segmentation.
1519 */
1520IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1521 RTGCPTR GCPtrMem, uint8_t iSegReg))
1522{
1523#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1524 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1525#else
1526 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1527#endif
1528}
1529
1530
1531/**
1532 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1533 */
1534IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1535 RTGCPTR GCPtrMem, uint8_t iSegReg))
1536{
1537#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1538 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1539#else
1540 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1541#endif
1542}
1543
1544
1545/**
1546 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1547 */
1548IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1549 RTGCPTR GCPtrMem, uint8_t iSegReg))
1550{
1551#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1552 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1553#else
1554 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1555#endif
1556}
1557
1558
1559/**
1560 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1561 */
1562IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1563 RTGCPTR GCPtrMem, uint8_t iSegReg))
1564{
1565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1566 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1567#else
1568 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1569#endif
1570}
1571
1572
1573/**
1574 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1575 * segmentation.
1576 */
1577IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1578 RTGCPTR GCPtrMem, uint8_t iSegReg))
1579{
1580#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1581 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1582#else
1583 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1584#endif
1585}
1586
1587
1588/**
1589 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1590 */
1591IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1592 RTGCPTR GCPtrMem, uint8_t iSegReg))
1593{
1594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1595 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1596#else
1597 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1598#endif
1599}
1600
1601
1602/**
1603 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1604 */
1605IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1606 RTGCPTR GCPtrMem, uint8_t iSegReg))
1607{
1608#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1609 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1610#else
1611 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1612#endif
1613}
1614
1615
1616/**
1617 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1618 */
1619IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1620 RTGCPTR GCPtrMem, uint8_t iSegReg))
1621{
1622#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1623 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1624#else
1625 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1626#endif
1627}
1628
1629
1630/**
1631 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1632 */
1633IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1634 RTGCPTR GCPtrMem, uint8_t iSegReg))
1635{
1636#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1637 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1638#else
1639 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1640#endif
1641}
1642
1643
1644/**
1645 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1646 */
1647IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1648 RTGCPTR GCPtrMem, uint8_t iSegReg))
1649{
1650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1651 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1652#else
1653 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1660 * segmentation.
1661 */
1662IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1663 RTGCPTR GCPtrMem, uint8_t iSegReg))
1664{
1665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1666 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1667#else
1668 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1669#endif
1670}
1671
1672
1673/**
1674 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1675 */
1676IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1677 RTGCPTR GCPtrMem, uint8_t iSegReg))
1678{
1679#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1680 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1681#else
1682 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1683#endif
1684}
1685
1686
1687/**
1688 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1689 */
1690IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1691 RTGCPTR GCPtrMem, uint8_t iSegReg))
1692{
1693#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1694 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1695#else
1696 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1697#endif
1698}
1699
1700
1701/**
1702 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1703 */
1704IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1705 RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1708 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1709#else
1710 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1711#endif
1712}
1713
1714
1715/*********************************************************************************************************************************
1716* Helpers: Flat memory mapping. *
1717*********************************************************************************************************************************/
1718
1719/**
1720 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1721 * address.
1722 */
1723IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1724{
1725#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1726 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1727#else
1728 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1729#endif
1730}
1731
1732
1733/**
1734 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1735 */
1736IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1737{
1738#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1739 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1740#else
1741 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1742#endif
1743}
1744
1745
1746/**
1747 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1748 */
1749IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1750{
1751#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1752 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1753#else
1754 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1755#endif
1756}
1757
1758
1759/**
1760 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1763{
1764#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1765 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1766#else
1767 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1768#endif
1769}
1770
1771
1772/**
1773 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1774 * address.
1775 */
1776IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1777{
1778#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1779 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1780#else
1781 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1782#endif
1783}
1784
1785
1786/**
1787 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1788 */
1789IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1790{
1791#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1792 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1793#else
1794 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1795#endif
1796}
1797
1798
1799/**
1800 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1801 */
1802IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1803{
1804#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1805 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1806#else
1807 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1808#endif
1809}
1810
1811
1812/**
1813 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1814 */
1815IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1816{
1817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1818 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1819#else
1820 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1821#endif
1822}
1823
1824
1825/**
1826 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1827 * address.
1828 */
1829IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1830{
1831#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1832 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1833#else
1834 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1835#endif
1836}
1837
1838
1839/**
1840 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1841 */
1842IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1843{
1844#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1845 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1846#else
1847 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1848#endif
1849}
1850
1851
1852/**
1853 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1854 */
1855IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1856{
1857#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1858 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1859#else
1860 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1861#endif
1862}
1863
1864
1865/**
1866 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1867 */
1868IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1869{
1870#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1871 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1872#else
1873 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1874#endif
1875}
1876
1877
1878/**
1879 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1880 * address.
1881 */
1882IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1883{
1884#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1885 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1886#else
1887 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1888#endif
1889}
1890
1891
1892/**
1893 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1894 */
1895IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1896{
1897#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1898 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1899#else
1900 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1901#endif
1902}
1903
1904
1905/**
1906 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1907 */
1908IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1909{
1910#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1911 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1912#else
1913 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1914#endif
1915}
1916
1917
1918/**
1919 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1920 */
1921IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1922{
1923#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1924 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1925#else
1926 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1927#endif
1928}
1929
1930
1931/**
1932 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1933 */
1934IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1935{
1936#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1937 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1938#else
1939 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1940#endif
1941}
1942
1943
1944/**
1945 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1946 */
1947IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1948{
1949#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1950 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1951#else
1952 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1953#endif
1954}
1955
1956
1957/**
1958 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1959 * address.
1960 */
1961IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1962{
1963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1964 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1965#else
1966 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1967#endif
1968}
1969
1970
1971/**
1972 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1973 */
1974IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1975{
1976#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1977 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1978#else
1979 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1980#endif
1981}
1982
1983
1984/**
1985 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1986 */
1987IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1988{
1989#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1990 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1991#else
1992 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1993#endif
1994}
1995
1996
1997/**
1998 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1999 */
2000IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2001{
2002#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2003 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2004#else
2005 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2006#endif
2007}
2008
2009
2010/*********************************************************************************************************************************
2011* Helpers: Commit, rollback & unmap *
2012*********************************************************************************************************************************/
2013
2014/**
2015 * Used by TB code to commit and unmap a read-write memory mapping.
2016 */
2017IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2018{
2019 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2020}
2021
2022
2023/**
2024 * Used by TB code to commit and unmap a read-write memory mapping.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2027{
2028 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2029}
2030
2031
2032/**
2033 * Used by TB code to commit and unmap a write-only memory mapping.
2034 */
2035IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2036{
2037 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2038}
2039
2040
2041/**
2042 * Used by TB code to commit and unmap a read-only memory mapping.
2043 */
2044IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2045{
2046 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2047}
2048
2049
2050/**
2051 * Reinitializes the native recompiler state.
2052 *
2053 * Called before starting a new recompile job.
2054 */
2055static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2056{
2057 pReNative->cLabels = 0;
2058 pReNative->bmLabelTypes = 0;
2059 pReNative->cFixups = 0;
2060#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2061 pReNative->cTbExitFixups = 0;
2062#endif
2063#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2064 pReNative->pDbgInfo->cEntries = 0;
2065 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2066#endif
2067 pReNative->pTbOrg = pTb;
2068 pReNative->cCondDepth = 0;
2069 pReNative->uCondSeqNo = 0;
2070 pReNative->uCheckIrqSeqNo = 0;
2071 pReNative->uTlbSeqNo = 0;
2072
2073#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2074 pReNative->Core.offPc = 0;
2075 pReNative->Core.cInstrPcUpdateSkipped = 0;
2076#endif
2077#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2078 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2079#endif
2080 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2081#if IEMNATIVE_HST_GREG_COUNT < 32
2082 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2083#endif
2084 ;
2085 pReNative->Core.bmHstRegsWithGstShadow = 0;
2086 pReNative->Core.bmGstRegShadows = 0;
2087#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2088 pReNative->Core.bmGstRegShadowDirty = 0;
2089#endif
2090 pReNative->Core.bmVars = 0;
2091 pReNative->Core.bmStack = 0;
2092 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2093 pReNative->Core.u64ArgVars = UINT64_MAX;
2094
2095 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 22);
2096 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2097 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2098 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2118
2119 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2120
2121 /* Full host register reinit: */
2122 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2123 {
2124 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2125 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2126 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2127 }
2128
2129 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2130 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2131#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2132 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2133#endif
2134#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2135 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2136#endif
2137#ifdef IEMNATIVE_REG_FIXED_TMP1
2138 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2139#endif
2140#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2141 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2142#endif
2143 );
2144 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2145 {
2146 fRegs &= ~RT_BIT_32(idxReg);
2147 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2148 }
2149
2150 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2151#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2152 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2153#endif
2154#ifdef IEMNATIVE_REG_FIXED_TMP0
2155 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2156#endif
2157#ifdef IEMNATIVE_REG_FIXED_TMP1
2158 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2159#endif
2160#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2161 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2162#endif
2163
2164#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2165 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2166# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2167 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2168# endif
2169 ;
2170 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2171 pReNative->Core.bmGstSimdRegShadows = 0;
2172 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2173 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2174
2175 /* Full host register reinit: */
2176 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2177 {
2178 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2179 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2180 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2181 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2182 }
2183
2184 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2185 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2186 {
2187 fRegs &= ~RT_BIT_32(idxReg);
2188 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2189 }
2190
2191#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2192 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2193#endif
2194
2195#endif
2196
2197 return pReNative;
2198}
2199
2200
2201/**
2202 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2203 */
2204static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2205{
2206 RTMemFree(pReNative->pInstrBuf);
2207 RTMemFree(pReNative->paLabels);
2208 RTMemFree(pReNative->paFixups);
2209#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2210 RTMemFree(pReNative->paTbExitFixups);
2211#endif
2212#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2213 RTMemFree(pReNative->pDbgInfo);
2214#endif
2215 RTMemFree(pReNative);
2216}
2217
2218
2219/**
2220 * Allocates and initializes the native recompiler state.
2221 *
2222 * This is called the first time an EMT wants to recompile something.
2223 *
2224 * @returns Pointer to the new recompiler state.
2225 * @param pVCpu The cross context virtual CPU structure of the calling
2226 * thread.
2227 * @param pTb The TB that's about to be recompiled. When this is NULL,
2228 * the recompiler state is for emitting the common per-chunk
2229 * code from iemNativeRecompileAttachExecMemChunkCtx.
2230 * @thread EMT(pVCpu)
2231 */
2232static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2233{
2234 VMCPU_ASSERT_EMT(pVCpu);
2235
2236 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2237 AssertReturn(pReNative, NULL);
2238
2239 /*
2240 * Try allocate all the buffers and stuff we need.
2241 */
2242 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2243 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2244 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2245 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2246#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2247 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2248#endif
2249#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2250 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2251#endif
2252 if (RT_LIKELY( pReNative->pInstrBuf
2253 && pReNative->paLabels
2254 && pReNative->paFixups)
2255#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2256 && pReNative->paTbExitFixups
2257#endif
2258#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2259 && pReNative->pDbgInfo
2260#endif
2261 )
2262 {
2263 /*
2264 * Set the buffer & array sizes on success.
2265 */
2266 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2267 pReNative->cLabelsAlloc = _8K / cFactor;
2268 pReNative->cFixupsAlloc = _16K / cFactor;
2269#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2270 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2271#endif
2272#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2273 pReNative->cDbgInfoAlloc = _16K / cFactor;
2274#endif
2275
2276 /* Other constant stuff: */
2277 pReNative->pVCpu = pVCpu;
2278
2279 /*
2280 * Done, just reinit it.
2281 */
2282 return iemNativeReInit(pReNative, pTb);
2283 }
2284
2285 /*
2286 * Failed. Cleanup and return.
2287 */
2288 AssertFailed();
2289 iemNativeTerm(pReNative);
2290 return NULL;
2291}
2292
2293
2294/**
2295 * Creates a label
2296 *
2297 * If the label does not yet have a defined position,
2298 * call iemNativeLabelDefine() later to set it.
2299 *
2300 * @returns Label ID. Throws VBox status code on failure, so no need to check
2301 * the return value.
2302 * @param pReNative The native recompile state.
2303 * @param enmType The label type.
2304 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2305 * label is not yet defined (default).
2306 * @param uData Data associated with the lable. Only applicable to
2307 * certain type of labels. Default is zero.
2308 */
2309DECL_HIDDEN_THROW(uint32_t)
2310iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2311 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2312{
2313 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2314#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
2315 Assert(enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2316#endif
2317
2318 /*
2319 * Locate existing label definition.
2320 *
2321 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2322 * and uData is zero.
2323 */
2324 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2325 uint32_t const cLabels = pReNative->cLabels;
2326 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2327#ifndef VBOX_STRICT
2328 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2329 && offWhere == UINT32_MAX
2330 && uData == 0
2331#endif
2332 )
2333 {
2334#ifndef VBOX_STRICT
2335 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2336 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2337 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2338 if (idxLabel < pReNative->cLabels)
2339 return idxLabel;
2340#else
2341 for (uint32_t i = 0; i < cLabels; i++)
2342 if ( paLabels[i].enmType == enmType
2343 && paLabels[i].uData == uData)
2344 {
2345 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2346 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2347 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2348 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2349 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2350 return i;
2351 }
2352 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2353 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2354#endif
2355 }
2356
2357 /*
2358 * Make sure we've got room for another label.
2359 */
2360 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2361 { /* likely */ }
2362 else
2363 {
2364 uint32_t cNew = pReNative->cLabelsAlloc;
2365 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2366 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2367 cNew *= 2;
2368 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2369 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2370 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2371 pReNative->paLabels = paLabels;
2372 pReNative->cLabelsAlloc = cNew;
2373 }
2374
2375 /*
2376 * Define a new label.
2377 */
2378 paLabels[cLabels].off = offWhere;
2379 paLabels[cLabels].enmType = enmType;
2380 paLabels[cLabels].uData = uData;
2381 pReNative->cLabels = cLabels + 1;
2382
2383 Assert((unsigned)enmType < 64);
2384 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2385
2386 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2387 {
2388 Assert(uData == 0);
2389 pReNative->aidxUniqueLabels[enmType] = cLabels;
2390 }
2391
2392 if (offWhere != UINT32_MAX)
2393 {
2394#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2395 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2396 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2397#endif
2398 }
2399 return cLabels;
2400}
2401
2402
2403/**
2404 * Defines the location of an existing label.
2405 *
2406 * @param pReNative The native recompile state.
2407 * @param idxLabel The label to define.
2408 * @param offWhere The position.
2409 */
2410DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2411{
2412 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2413 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2414 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2415 pLabel->off = offWhere;
2416#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2417 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2418 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2419#endif
2420}
2421
2422
2423#if !defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) || !defined(RT_ARCH_AMD64)
2424/**
2425 * Looks up a lable.
2426 *
2427 * @returns Label ID if found, UINT32_MAX if not.
2428 */
2429static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2430 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2431{
2432 Assert((unsigned)enmType < 64);
2433 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2434 {
2435 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2436 return pReNative->aidxUniqueLabels[enmType];
2437
2438 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2439 uint32_t const cLabels = pReNative->cLabels;
2440 for (uint32_t i = 0; i < cLabels; i++)
2441 if ( paLabels[i].enmType == enmType
2442 && paLabels[i].uData == uData
2443 && ( paLabels[i].off == offWhere
2444 || offWhere == UINT32_MAX
2445 || paLabels[i].off == UINT32_MAX))
2446 return i;
2447 }
2448 return UINT32_MAX;
2449}
2450#endif
2451
2452
2453/**
2454 * Adds a fixup.
2455 *
2456 * @throws VBox status code (int) on failure.
2457 * @param pReNative The native recompile state.
2458 * @param offWhere The instruction offset of the fixup location.
2459 * @param idxLabel The target label ID for the fixup.
2460 * @param enmType The fixup type.
2461 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2462 */
2463DECL_HIDDEN_THROW(void)
2464iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2465 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2466{
2467 Assert(idxLabel <= UINT16_MAX);
2468 Assert((unsigned)enmType <= UINT8_MAX);
2469#ifdef RT_ARCH_ARM64
2470 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2471 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2472 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2473#endif
2474
2475 /*
2476 * Make sure we've room.
2477 */
2478 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2479 uint32_t const cFixups = pReNative->cFixups;
2480 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2481 { /* likely */ }
2482 else
2483 {
2484 uint32_t cNew = pReNative->cFixupsAlloc;
2485 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2486 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2487 cNew *= 2;
2488 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2489 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2490 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2491 pReNative->paFixups = paFixups;
2492 pReNative->cFixupsAlloc = cNew;
2493 }
2494
2495 /*
2496 * Add the fixup.
2497 */
2498 paFixups[cFixups].off = offWhere;
2499 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2500 paFixups[cFixups].enmType = enmType;
2501 paFixups[cFixups].offAddend = offAddend;
2502 pReNative->cFixups = cFixups + 1;
2503}
2504
2505
2506#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2507/**
2508 * Adds a fixup to the per chunk tail code.
2509 *
2510 * @throws VBox status code (int) on failure.
2511 * @param pReNative The native recompile state.
2512 * @param offWhere The instruction offset of the fixup location.
2513 * @param enmExitReason The exit reason to jump to.
2514 */
2515DECL_HIDDEN_THROW(void)
2516iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVEEXITREASON enmExitReason)
2517{
2518 /*
2519 * Make sure we've room.
2520 */
2521 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2522 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2523 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2524 { /* likely */ }
2525 else
2526 {
2527 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2528 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2529 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2530 cNew *= 2;
2531 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2532 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2533 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2534 pReNative->paTbExitFixups = paTbExitFixups;
2535 pReNative->cTbExitFixupsAlloc = cNew;
2536 }
2537
2538 /*
2539 * Add the fixup.
2540 */
2541 paTbExitFixups[cTbExitFixups].off = offWhere;
2542 paTbExitFixups[cTbExitFixups].enmExitReason = (uint32_t)enmExitReason;
2543 pReNative->cTbExitFixups = cTbExitFixups + 1;
2544}
2545#endif
2546
2547
2548/**
2549 * Slow code path for iemNativeInstrBufEnsure.
2550 */
2551DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2552{
2553 /* Double the buffer size till we meet the request. */
2554 uint32_t cNew = pReNative->cInstrBufAlloc;
2555 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2556 do
2557 cNew *= 2;
2558 while (cNew < off + cInstrReq);
2559
2560 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2561#ifdef RT_ARCH_ARM64
2562 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2563#else
2564 uint32_t const cbMaxInstrBuf = _2M;
2565#endif
2566 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2567
2568 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2569 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2570
2571#ifdef VBOX_STRICT
2572 pReNative->offInstrBufChecked = off + cInstrReq;
2573#endif
2574 pReNative->cInstrBufAlloc = cNew;
2575 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2576}
2577
2578#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2579
2580/**
2581 * Grows the static debug info array used during recompilation.
2582 *
2583 * @returns Pointer to the new debug info block; throws VBox status code on
2584 * failure, so no need to check the return value.
2585 */
2586DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2587{
2588 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2589 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2590 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2591 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2592 pReNative->pDbgInfo = pDbgInfo;
2593 pReNative->cDbgInfoAlloc = cNew;
2594 return pDbgInfo;
2595}
2596
2597
2598/**
2599 * Adds a new debug info uninitialized entry, returning the pointer to it.
2600 */
2601DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2602{
2603 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2604 { /* likely */ }
2605 else
2606 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2607 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2608}
2609
2610
2611/**
2612 * Debug Info: Adds a native offset record, if necessary.
2613 */
2614DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2615{
2616 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2617
2618 /*
2619 * Do we need this one?
2620 */
2621 uint32_t const offPrev = pDbgInfo->offNativeLast;
2622 if (offPrev == off)
2623 return;
2624 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2625
2626 /*
2627 * Add it.
2628 */
2629 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2630 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2631 pEntry->NativeOffset.offNative = off;
2632 pDbgInfo->offNativeLast = off;
2633}
2634
2635
2636/**
2637 * Debug Info: Record info about a label.
2638 */
2639static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2640{
2641 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2642 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2643 pEntry->Label.uUnused = 0;
2644 pEntry->Label.enmLabel = (uint8_t)enmType;
2645 pEntry->Label.uData = uData;
2646}
2647
2648
2649/**
2650 * Debug Info: Record info about a threaded call.
2651 */
2652static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2653{
2654 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2655 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2656 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2657 pEntry->ThreadedCall.uUnused = 0;
2658 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2659}
2660
2661
2662/**
2663 * Debug Info: Record info about a new guest instruction.
2664 */
2665static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2666{
2667 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2668 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2669 pEntry->GuestInstruction.uUnused = 0;
2670 pEntry->GuestInstruction.fExec = fExec;
2671}
2672
2673
2674/**
2675 * Debug Info: Record info about guest register shadowing.
2676 */
2677DECL_HIDDEN_THROW(void)
2678iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2679 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2680{
2681 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2682 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2683 pEntry->GuestRegShadowing.uUnused = 0;
2684 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2685 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2686 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2687#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2688 Assert( idxHstReg != UINT8_MAX
2689 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2690#endif
2691}
2692
2693
2694# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2695/**
2696 * Debug Info: Record info about guest register shadowing.
2697 */
2698DECL_HIDDEN_THROW(void)
2699iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2700 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2701{
2702 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2703 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2704 pEntry->GuestSimdRegShadowing.uUnused = 0;
2705 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2706 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2707 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2708}
2709# endif
2710
2711
2712# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2713/**
2714 * Debug Info: Record info about delayed RIP updates.
2715 */
2716DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2717{
2718 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2719 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2720 pEntry->DelayedPcUpdate.offPc = offPc;
2721 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2722}
2723# endif
2724
2725# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2726
2727/**
2728 * Debug Info: Record info about a dirty guest register.
2729 */
2730DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2731 uint8_t idxGstReg, uint8_t idxHstReg)
2732{
2733 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2734 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2735 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2736 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2737 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2738}
2739
2740
2741/**
2742 * Debug Info: Record info about a dirty guest register writeback operation.
2743 */
2744DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2745{
2746 unsigned const cBitsGstRegMask = 25;
2747 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2748
2749 /* The first block of 25 bits: */
2750 if (fGstReg & fGstRegMask)
2751 {
2752 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2753 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2754 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2755 pEntry->GuestRegWriteback.cShift = 0;
2756 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2757 fGstReg &= ~(uint64_t)fGstRegMask;
2758 if (!fGstReg)
2759 return;
2760 }
2761
2762 /* The second block of 25 bits: */
2763 fGstReg >>= cBitsGstRegMask;
2764 if (fGstReg & fGstRegMask)
2765 {
2766 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2767 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2768 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2769 pEntry->GuestRegWriteback.cShift = 0;
2770 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2771 fGstReg &= ~(uint64_t)fGstRegMask;
2772 if (!fGstReg)
2773 return;
2774 }
2775
2776 /* The last block with 14 bits: */
2777 fGstReg >>= cBitsGstRegMask;
2778 Assert(fGstReg & fGstRegMask);
2779 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2780 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2781 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2782 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2783 pEntry->GuestRegWriteback.cShift = 2;
2784 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2785}
2786
2787# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2788
2789#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2790
2791
2792/*********************************************************************************************************************************
2793* Register Allocator *
2794*********************************************************************************************************************************/
2795
2796/**
2797 * Register parameter indexes (indexed by argument number).
2798 */
2799DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2800{
2801 IEMNATIVE_CALL_ARG0_GREG,
2802 IEMNATIVE_CALL_ARG1_GREG,
2803 IEMNATIVE_CALL_ARG2_GREG,
2804 IEMNATIVE_CALL_ARG3_GREG,
2805#if defined(IEMNATIVE_CALL_ARG4_GREG)
2806 IEMNATIVE_CALL_ARG4_GREG,
2807# if defined(IEMNATIVE_CALL_ARG5_GREG)
2808 IEMNATIVE_CALL_ARG5_GREG,
2809# if defined(IEMNATIVE_CALL_ARG6_GREG)
2810 IEMNATIVE_CALL_ARG6_GREG,
2811# if defined(IEMNATIVE_CALL_ARG7_GREG)
2812 IEMNATIVE_CALL_ARG7_GREG,
2813# endif
2814# endif
2815# endif
2816#endif
2817};
2818AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2819
2820/**
2821 * Call register masks indexed by argument count.
2822 */
2823DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2824{
2825 0,
2826 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2827 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2828 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2829 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2830 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2831#if defined(IEMNATIVE_CALL_ARG4_GREG)
2832 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2833 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2834# if defined(IEMNATIVE_CALL_ARG5_GREG)
2835 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2836 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2837# if defined(IEMNATIVE_CALL_ARG6_GREG)
2838 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2839 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2840 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2841# if defined(IEMNATIVE_CALL_ARG7_GREG)
2842 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2843 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2844 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2845# endif
2846# endif
2847# endif
2848#endif
2849};
2850
2851#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2852/**
2853 * BP offset of the stack argument slots.
2854 *
2855 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2856 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2857 */
2858DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2859{
2860 IEMNATIVE_FP_OFF_STACK_ARG0,
2861# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2862 IEMNATIVE_FP_OFF_STACK_ARG1,
2863# endif
2864# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2865 IEMNATIVE_FP_OFF_STACK_ARG2,
2866# endif
2867# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2868 IEMNATIVE_FP_OFF_STACK_ARG3,
2869# endif
2870};
2871AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2872#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2873
2874/**
2875 * Info about shadowed guest register values.
2876 * @see IEMNATIVEGSTREG
2877 */
2878DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2879{
2880#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2881 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2882 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2883 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2884 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2885 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2886 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2887 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2888 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2889 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2897 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2898 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2899 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2900 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2901 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2902 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2903 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2904 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2905 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2906 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2907 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2908 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2909 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2910 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2911 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2912 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2913 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2914 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2915 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2916 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2917 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2918 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2919 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2920 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2921 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2922 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2923 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2924 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2925 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2926 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2927 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2928 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2929#undef CPUMCTX_OFF_AND_SIZE
2930};
2931AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2932
2933
2934/** Host CPU general purpose register names. */
2935DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2936{
2937#ifdef RT_ARCH_AMD64
2938 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2939#elif RT_ARCH_ARM64
2940 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2941 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2942#else
2943# error "port me"
2944#endif
2945};
2946
2947
2948#if 0 /* unused */
2949/**
2950 * Tries to locate a suitable register in the given register mask.
2951 *
2952 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2953 * failed.
2954 *
2955 * @returns Host register number on success, returns UINT8_MAX on failure.
2956 */
2957static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2958{
2959 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2960 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2961 if (fRegs)
2962 {
2963 /** @todo pick better here: */
2964 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2965
2966 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2967 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2968 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2969 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2970
2971 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2972 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2973 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2974 return idxReg;
2975 }
2976 return UINT8_MAX;
2977}
2978#endif /* unused */
2979
2980
2981#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2982/**
2983 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2984 *
2985 * @returns New code buffer offset on success, UINT32_MAX on failure.
2986 * @param pReNative .
2987 * @param off The current code buffer position.
2988 * @param enmGstReg The guest register to store to.
2989 * @param idxHstReg The host register to store from.
2990 */
2991DECL_FORCE_INLINE_THROW(uint32_t)
2992iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2993{
2994 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2995 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2996
2997 switch (g_aGstShadowInfo[enmGstReg].cb)
2998 {
2999 case sizeof(uint64_t):
3000 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3001 case sizeof(uint32_t):
3002 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3003 case sizeof(uint16_t):
3004 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3005#if 0 /* not present in the table. */
3006 case sizeof(uint8_t):
3007 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3008#endif
3009 default:
3010 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3011 }
3012}
3013
3014
3015/**
3016 * Emits code to flush a pending write of the given guest register if any.
3017 *
3018 * @returns New code buffer offset.
3019 * @param pReNative The native recompile state.
3020 * @param off Current code buffer position.
3021 * @param enmGstReg The guest register to flush.
3022 */
3023DECL_HIDDEN_THROW(uint32_t)
3024iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3025{
3026 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3027
3028 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3029 && enmGstReg <= kIemNativeGstReg_GprLast)
3030 || enmGstReg == kIemNativeGstReg_MxCsr);
3031 Assert( idxHstReg != UINT8_MAX
3032 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3033 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3034 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3035
3036 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3037
3038 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3039 return off;
3040}
3041
3042
3043/**
3044 * Flush the given set of guest registers if marked as dirty.
3045 *
3046 * @returns New code buffer offset.
3047 * @param pReNative The native recompile state.
3048 * @param off Current code buffer position.
3049 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3050 */
3051DECL_HIDDEN_THROW(uint32_t)
3052iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3053{
3054 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3055 if (bmGstRegShadowDirty)
3056 {
3057# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3058 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3059 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3060# endif
3061 do
3062 {
3063 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3064 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3065 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3066 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3067 } while (bmGstRegShadowDirty);
3068 }
3069
3070 return off;
3071}
3072
3073
3074/**
3075 * Flush all shadowed guest registers marked as dirty for the given host register.
3076 *
3077 * @returns New code buffer offset.
3078 * @param pReNative The native recompile state.
3079 * @param off Current code buffer position.
3080 * @param idxHstReg The host register.
3081 *
3082 * @note This doesn't do any unshadowing of guest registers from the host register.
3083 */
3084DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3085{
3086 /* We need to flush any pending guest register writes this host register shadows. */
3087 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3088 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3089 {
3090# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3091 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3092 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3093# endif
3094 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
3095 * likely to only have a single bit set. It'll be in the 0..15 range,
3096 * but still it's 15 unnecessary loops for the last guest register. */
3097
3098 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3099 do
3100 {
3101 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3102 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3103 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3104 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3105 } while (bmGstRegShadowDirty);
3106 }
3107
3108 return off;
3109}
3110#endif
3111
3112
3113/**
3114 * Locate a register, possibly freeing one up.
3115 *
3116 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3117 * failed.
3118 *
3119 * @returns Host register number on success. Returns UINT8_MAX if no registers
3120 * found, the caller is supposed to deal with this and raise a
3121 * allocation type specific status code (if desired).
3122 *
3123 * @throws VBox status code if we're run into trouble spilling a variable of
3124 * recording debug info. Does NOT throw anything if we're out of
3125 * registers, though.
3126 */
3127static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3128 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3129{
3130 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3131 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3132 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3133
3134 /*
3135 * Try a freed register that's shadowing a guest register.
3136 */
3137 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3138 if (fRegs)
3139 {
3140 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3141
3142#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3143 /*
3144 * When we have livness information, we use it to kick out all shadowed
3145 * guest register that will not be needed any more in this TB. If we're
3146 * lucky, this may prevent us from ending up here again.
3147 *
3148 * Note! We must consider the previous entry here so we don't free
3149 * anything that the current threaded function requires (current
3150 * entry is produced by the next threaded function).
3151 */
3152 uint32_t const idxCurCall = pReNative->idxCurCall;
3153 if (idxCurCall > 0)
3154 {
3155 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3156
3157# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3158 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3159 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3160 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3161#else
3162 /* Construct a mask of the registers not in the read or write state.
3163 Note! We could skips writes, if they aren't from us, as this is just
3164 a hack to prevent trashing registers that have just been written
3165 or will be written when we retire the current instruction. */
3166 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3167 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3168 & IEMLIVENESSBIT_MASK;
3169#endif
3170 /* Merge EFLAGS. */
3171 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3172 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3173 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3174 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3175 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3176
3177 /* If it matches any shadowed registers. */
3178 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3179 {
3180#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3181 /* Writeback any dirty shadow registers we are about to unshadow. */
3182 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3183#endif
3184
3185 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3186 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3187 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3188
3189 /* See if we've got any unshadowed registers we can return now. */
3190 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3191 if (fUnshadowedRegs)
3192 {
3193 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3194 return (fPreferVolatile
3195 ? ASMBitFirstSetU32(fUnshadowedRegs)
3196 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3197 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3198 - 1;
3199 }
3200 }
3201 }
3202#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3203
3204 unsigned const idxReg = (fPreferVolatile
3205 ? ASMBitFirstSetU32(fRegs)
3206 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3207 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3208 - 1;
3209
3210 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3211 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3212 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3213 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3214
3215#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3216 /* We need to flush any pending guest register writes this host register shadows. */
3217 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3218#endif
3219
3220 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3221 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3222 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3223 return idxReg;
3224 }
3225
3226 /*
3227 * Try free up a variable that's in a register.
3228 *
3229 * We do two rounds here, first evacuating variables we don't need to be
3230 * saved on the stack, then in the second round move things to the stack.
3231 */
3232 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3233 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3234 {
3235 uint32_t fVars = pReNative->Core.bmVars;
3236 while (fVars)
3237 {
3238 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3239 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3240#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3241 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3242 continue;
3243#endif
3244
3245 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3246 && (RT_BIT_32(idxReg) & fRegMask)
3247 && ( iLoop == 0
3248 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3249 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3250 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3251 {
3252 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3253 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3254 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3255 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3256 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3257 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3258#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3259 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3260#endif
3261
3262 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3263 {
3264 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3265 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3266 }
3267
3268 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3269 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3270
3271 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3272 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3273 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3274 return idxReg;
3275 }
3276 fVars &= ~RT_BIT_32(idxVar);
3277 }
3278 }
3279
3280 return UINT8_MAX;
3281}
3282
3283
3284/**
3285 * Reassigns a variable to a different register specified by the caller.
3286 *
3287 * @returns The new code buffer position.
3288 * @param pReNative The native recompile state.
3289 * @param off The current code buffer position.
3290 * @param idxVar The variable index.
3291 * @param idxRegOld The old host register number.
3292 * @param idxRegNew The new host register number.
3293 * @param pszCaller The caller for logging.
3294 */
3295static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3296 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3297{
3298 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3299 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3300#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3301 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3302#endif
3303 RT_NOREF(pszCaller);
3304
3305#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3306 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3307#endif
3308 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3309
3310 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3311#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3312 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3313#endif
3314 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3315 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3316 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3317
3318 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3319 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3320 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3321 if (fGstRegShadows)
3322 {
3323 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3324 | RT_BIT_32(idxRegNew);
3325 while (fGstRegShadows)
3326 {
3327 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3328 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3329
3330 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3331 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3332 }
3333 }
3334
3335 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3336 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3337 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3338 return off;
3339}
3340
3341
3342/**
3343 * Moves a variable to a different register or spills it onto the stack.
3344 *
3345 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3346 * kinds can easily be recreated if needed later.
3347 *
3348 * @returns The new code buffer position.
3349 * @param pReNative The native recompile state.
3350 * @param off The current code buffer position.
3351 * @param idxVar The variable index.
3352 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3353 * call-volatile registers.
3354 */
3355DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3356 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3357{
3358 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3359 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3360 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3361 Assert(!pVar->fRegAcquired);
3362
3363 uint8_t const idxRegOld = pVar->idxReg;
3364 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3365 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3366 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3367 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3368 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3369 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3370 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3371 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3372#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3373 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3374#endif
3375
3376
3377 /** @todo Add statistics on this.*/
3378 /** @todo Implement basic variable liveness analysis (python) so variables
3379 * can be freed immediately once no longer used. This has the potential to
3380 * be trashing registers and stack for dead variables.
3381 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3382
3383 /*
3384 * First try move it to a different register, as that's cheaper.
3385 */
3386 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3387 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3388 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3389 if (fRegs)
3390 {
3391 /* Avoid using shadow registers, if possible. */
3392 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3393 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3394 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3395 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3396 }
3397
3398 /*
3399 * Otherwise we must spill the register onto the stack.
3400 */
3401 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3402 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3403 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3404 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3405
3406 pVar->idxReg = UINT8_MAX;
3407 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3408 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3409 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3410 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3411 return off;
3412}
3413
3414
3415/**
3416 * Allocates a temporary host general purpose register.
3417 *
3418 * This may emit code to save register content onto the stack in order to free
3419 * up a register.
3420 *
3421 * @returns The host register number; throws VBox status code on failure,
3422 * so no need to check the return value.
3423 * @param pReNative The native recompile state.
3424 * @param poff Pointer to the variable with the code buffer position.
3425 * This will be update if we need to move a variable from
3426 * register to stack in order to satisfy the request.
3427 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3428 * registers (@c true, default) or the other way around
3429 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3430 */
3431DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3432{
3433 /*
3434 * Try find a completely unused register, preferably a call-volatile one.
3435 */
3436 uint8_t idxReg;
3437 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3438 & ~pReNative->Core.bmHstRegsWithGstShadow
3439 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3440 if (fRegs)
3441 {
3442 if (fPreferVolatile)
3443 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3444 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3445 else
3446 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3447 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3448 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3449 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3450 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3451 }
3452 else
3453 {
3454 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3455 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3456 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3457 }
3458 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3459}
3460
3461
3462/**
3463 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3464 * registers.
3465 *
3466 * @returns The host register number; throws VBox status code on failure,
3467 * so no need to check the return value.
3468 * @param pReNative The native recompile state.
3469 * @param poff Pointer to the variable with the code buffer position.
3470 * This will be update if we need to move a variable from
3471 * register to stack in order to satisfy the request.
3472 * @param fRegMask Mask of acceptable registers.
3473 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3474 * registers (@c true, default) or the other way around
3475 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3476 */
3477DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3478 bool fPreferVolatile /*= true*/)
3479{
3480 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3481 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3482
3483 /*
3484 * Try find a completely unused register, preferably a call-volatile one.
3485 */
3486 uint8_t idxReg;
3487 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3488 & ~pReNative->Core.bmHstRegsWithGstShadow
3489 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3490 & fRegMask;
3491 if (fRegs)
3492 {
3493 if (fPreferVolatile)
3494 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3495 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3496 else
3497 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3498 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3499 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3500 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3501 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3502 }
3503 else
3504 {
3505 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3506 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3507 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3508 }
3509 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3510}
3511
3512
3513/**
3514 * Allocates a temporary register for loading an immediate value into.
3515 *
3516 * This will emit code to load the immediate, unless there happens to be an
3517 * unused register with the value already loaded.
3518 *
3519 * The caller will not modify the returned register, it must be considered
3520 * read-only. Free using iemNativeRegFreeTmpImm.
3521 *
3522 * @returns The host register number; throws VBox status code on failure, so no
3523 * need to check the return value.
3524 * @param pReNative The native recompile state.
3525 * @param poff Pointer to the variable with the code buffer position.
3526 * @param uImm The immediate value that the register must hold upon
3527 * return.
3528 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3529 * registers (@c true, default) or the other way around
3530 * (@c false).
3531 *
3532 * @note Reusing immediate values has not been implemented yet.
3533 */
3534DECL_HIDDEN_THROW(uint8_t)
3535iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3536{
3537 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3538 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3539 return idxReg;
3540}
3541
3542
3543/**
3544 * Allocates a temporary host general purpose register for keeping a guest
3545 * register value.
3546 *
3547 * Since we may already have a register holding the guest register value,
3548 * code will be emitted to do the loading if that's not the case. Code may also
3549 * be emitted if we have to free up a register to satify the request.
3550 *
3551 * @returns The host register number; throws VBox status code on failure, so no
3552 * need to check the return value.
3553 * @param pReNative The native recompile state.
3554 * @param poff Pointer to the variable with the code buffer
3555 * position. This will be update if we need to move a
3556 * variable from register to stack in order to satisfy
3557 * the request.
3558 * @param enmGstReg The guest register that will is to be updated.
3559 * @param enmIntendedUse How the caller will be using the host register.
3560 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3561 * register is okay (default). The ASSUMPTION here is
3562 * that the caller has already flushed all volatile
3563 * registers, so this is only applied if we allocate a
3564 * new register.
3565 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3566 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3567 */
3568DECL_HIDDEN_THROW(uint8_t)
3569iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3570 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3571 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3572{
3573 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3574#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3575 AssertMsg( fSkipLivenessAssert
3576 || pReNative->idxCurCall == 0
3577 || enmGstReg == kIemNativeGstReg_Pc
3578 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3579 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3580 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3581 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3582 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3583 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3584#endif
3585 RT_NOREF(fSkipLivenessAssert);
3586#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3587 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3588#endif
3589 uint32_t const fRegMask = !fNoVolatileRegs
3590 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3591 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3592
3593 /*
3594 * First check if the guest register value is already in a host register.
3595 */
3596 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3597 {
3598 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3599 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3600 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3601 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3602
3603 /* It's not supposed to be allocated... */
3604 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3605 {
3606 /*
3607 * If the register will trash the guest shadow copy, try find a
3608 * completely unused register we can use instead. If that fails,
3609 * we need to disassociate the host reg from the guest reg.
3610 */
3611 /** @todo would be nice to know if preserving the register is in any way helpful. */
3612 /* If the purpose is calculations, try duplicate the register value as
3613 we'll be clobbering the shadow. */
3614 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3615 && ( ~pReNative->Core.bmHstRegs
3616 & ~pReNative->Core.bmHstRegsWithGstShadow
3617 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3618 {
3619 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3620
3621 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3622
3623 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3624 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3625 g_apszIemNativeHstRegNames[idxRegNew]));
3626 idxReg = idxRegNew;
3627 }
3628 /* If the current register matches the restrictions, go ahead and allocate
3629 it for the caller. */
3630 else if (fRegMask & RT_BIT_32(idxReg))
3631 {
3632 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3633 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3634 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3635 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3636 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3637 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3638 else
3639 {
3640 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3641 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3642 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3643 }
3644 }
3645 /* Otherwise, allocate a register that satisfies the caller and transfer
3646 the shadowing if compatible with the intended use. (This basically
3647 means the call wants a non-volatile register (RSP push/pop scenario).) */
3648 else
3649 {
3650 Assert(fNoVolatileRegs);
3651 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3652 !fNoVolatileRegs
3653 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3654 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3655 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3656 {
3657 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3658 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3659 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3660 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3661 }
3662 else
3663 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3664 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3665 g_apszIemNativeHstRegNames[idxRegNew]));
3666 idxReg = idxRegNew;
3667 }
3668 }
3669 else
3670 {
3671 /*
3672 * Oops. Shadowed guest register already allocated!
3673 *
3674 * Allocate a new register, copy the value and, if updating, the
3675 * guest shadow copy assignment to the new register.
3676 */
3677 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3678 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3679 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3680 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3681
3682 /** @todo share register for readonly access. */
3683 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3684 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3685
3686 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3687 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3688
3689 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3690 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3691 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3692 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3693 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3694 else
3695 {
3696 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3697 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3698 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3699 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3700 }
3701 idxReg = idxRegNew;
3702 }
3703 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3704
3705#ifdef VBOX_STRICT
3706 /* Strict builds: Check that the value is correct. */
3707 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3708#endif
3709
3710#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3711 /** @todo r=aeichner Implement for registers other than GPR as well. */
3712 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3713 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3714 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3715 && enmGstReg <= kIemNativeGstReg_GprLast)
3716 || enmGstReg == kIemNativeGstReg_MxCsr))
3717 {
3718# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3719 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3720 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3721# endif
3722 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3723 }
3724#endif
3725
3726 return idxReg;
3727 }
3728
3729 /*
3730 * Allocate a new register, load it with the guest value and designate it as a copy of the
3731 */
3732 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3733
3734 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3735 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3736
3737 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3738 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3739 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3740 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3741
3742#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3743 /** @todo r=aeichner Implement for registers other than GPR as well. */
3744 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3745 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3746 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3747 && enmGstReg <= kIemNativeGstReg_GprLast)
3748 || enmGstReg == kIemNativeGstReg_MxCsr))
3749 {
3750# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3751 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3752 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3753# endif
3754 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3755 }
3756#endif
3757
3758 return idxRegNew;
3759}
3760
3761
3762/**
3763 * Allocates a temporary host general purpose register that already holds the
3764 * given guest register value.
3765 *
3766 * The use case for this function is places where the shadowing state cannot be
3767 * modified due to branching and such. This will fail if the we don't have a
3768 * current shadow copy handy or if it's incompatible. The only code that will
3769 * be emitted here is value checking code in strict builds.
3770 *
3771 * The intended use can only be readonly!
3772 *
3773 * @returns The host register number, UINT8_MAX if not present.
3774 * @param pReNative The native recompile state.
3775 * @param poff Pointer to the instruction buffer offset.
3776 * Will be updated in strict builds if a register is
3777 * found.
3778 * @param enmGstReg The guest register that will is to be updated.
3779 * @note In strict builds, this may throw instruction buffer growth failures.
3780 * Non-strict builds will not throw anything.
3781 * @sa iemNativeRegAllocTmpForGuestReg
3782 */
3783DECL_HIDDEN_THROW(uint8_t)
3784iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3785{
3786 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3787#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3788 AssertMsg( pReNative->idxCurCall == 0
3789 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3790 || enmGstReg == kIemNativeGstReg_Pc,
3791 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3792#endif
3793
3794 /*
3795 * First check if the guest register value is already in a host register.
3796 */
3797 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3798 {
3799 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3800 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3801 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3802 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3803
3804 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3805 {
3806 /*
3807 * We only do readonly use here, so easy compared to the other
3808 * variant of this code.
3809 */
3810 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3811 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3812 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3813 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3814 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3815
3816#ifdef VBOX_STRICT
3817 /* Strict builds: Check that the value is correct. */
3818 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3819#else
3820 RT_NOREF(poff);
3821#endif
3822 return idxReg;
3823 }
3824 }
3825
3826 return UINT8_MAX;
3827}
3828
3829
3830/**
3831 * Allocates argument registers for a function call.
3832 *
3833 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3834 * need to check the return value.
3835 * @param pReNative The native recompile state.
3836 * @param off The current code buffer offset.
3837 * @param cArgs The number of arguments the function call takes.
3838 */
3839DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3840{
3841 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3842 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3843 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3844 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3845
3846 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3847 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3848 else if (cArgs == 0)
3849 return true;
3850
3851 /*
3852 * Do we get luck and all register are free and not shadowing anything?
3853 */
3854 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3855 for (uint32_t i = 0; i < cArgs; i++)
3856 {
3857 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3858 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3859 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3860 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3861 }
3862 /*
3863 * Okay, not lucky so we have to free up the registers.
3864 */
3865 else
3866 for (uint32_t i = 0; i < cArgs; i++)
3867 {
3868 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3869 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3870 {
3871 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3872 {
3873 case kIemNativeWhat_Var:
3874 {
3875 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3876 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3877 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3878 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3879 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3880#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3881 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3882#endif
3883
3884 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3885 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3886 else
3887 {
3888 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3889 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3890 }
3891 break;
3892 }
3893
3894 case kIemNativeWhat_Tmp:
3895 case kIemNativeWhat_Arg:
3896 case kIemNativeWhat_rc:
3897 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3898 default:
3899 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3900 }
3901
3902 }
3903 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3904 {
3905 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3906 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3907 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3908#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3909 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3910#endif
3911 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3912 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3913 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3914 }
3915 else
3916 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3917 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3918 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3919 }
3920 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3921 return true;
3922}
3923
3924
3925DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3926
3927
3928#if 0
3929/**
3930 * Frees a register assignment of any type.
3931 *
3932 * @param pReNative The native recompile state.
3933 * @param idxHstReg The register to free.
3934 *
3935 * @note Does not update variables.
3936 */
3937DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3938{
3939 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3940 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3941 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3942 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3943 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3944 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3945 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3946 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3947 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3948 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3949 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3950 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3951 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3952 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3953
3954 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3955 /* no flushing, right:
3956 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3957 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3958 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3959 */
3960}
3961#endif
3962
3963
3964/**
3965 * Frees a temporary register.
3966 *
3967 * Any shadow copies of guest registers assigned to the host register will not
3968 * be flushed by this operation.
3969 */
3970DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3971{
3972 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3973 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3974 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3975 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3976 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3977}
3978
3979
3980/**
3981 * Frees a temporary immediate register.
3982 *
3983 * It is assumed that the call has not modified the register, so it still hold
3984 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3985 */
3986DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3987{
3988 iemNativeRegFreeTmp(pReNative, idxHstReg);
3989}
3990
3991
3992/**
3993 * Frees a register assigned to a variable.
3994 *
3995 * The register will be disassociated from the variable.
3996 */
3997DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3998{
3999 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4000 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4001 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4002 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4003 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4004#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4005 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4006#endif
4007
4008 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4009 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4010 if (!fFlushShadows)
4011 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4012 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4013 else
4014 {
4015 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4016 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4017#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4018 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4019#endif
4020 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4021 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4022 uint64_t fGstRegShadows = fGstRegShadowsOld;
4023 while (fGstRegShadows)
4024 {
4025 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4026 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4027
4028 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4029 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4030 }
4031 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4032 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4033 }
4034}
4035
4036
4037#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4038# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4039/** Host CPU SIMD register names. */
4040DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4041{
4042# ifdef RT_ARCH_AMD64
4043 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4044# elif RT_ARCH_ARM64
4045 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4046 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4047# else
4048# error "port me"
4049# endif
4050};
4051# endif
4052
4053
4054/**
4055 * Frees a SIMD register assigned to a variable.
4056 *
4057 * The register will be disassociated from the variable.
4058 */
4059DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4060{
4061 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4062 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4063 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4064 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4065 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4066 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4067
4068 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4069 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4070 if (!fFlushShadows)
4071 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4072 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4073 else
4074 {
4075 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4076 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4077 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4078 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4079 uint64_t fGstRegShadows = fGstRegShadowsOld;
4080 while (fGstRegShadows)
4081 {
4082 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4083 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4084
4085 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4086 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4087 }
4088 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4089 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4090 }
4091}
4092
4093
4094/**
4095 * Reassigns a variable to a different SIMD register specified by the caller.
4096 *
4097 * @returns The new code buffer position.
4098 * @param pReNative The native recompile state.
4099 * @param off The current code buffer position.
4100 * @param idxVar The variable index.
4101 * @param idxRegOld The old host register number.
4102 * @param idxRegNew The new host register number.
4103 * @param pszCaller The caller for logging.
4104 */
4105static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4106 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4107{
4108 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4109 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4110 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4111 RT_NOREF(pszCaller);
4112
4113 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4114 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4115 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4116
4117 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4118 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4119 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4120
4121 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4122 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4123 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4124
4125 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4126 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4127 else
4128 {
4129 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4130 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4131 }
4132
4133 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4134 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4135 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4136 if (fGstRegShadows)
4137 {
4138 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4139 | RT_BIT_32(idxRegNew);
4140 while (fGstRegShadows)
4141 {
4142 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4143 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4144
4145 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4146 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4147 }
4148 }
4149
4150 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4151 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4152 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4153 return off;
4154}
4155
4156
4157/**
4158 * Moves a variable to a different register or spills it onto the stack.
4159 *
4160 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4161 * kinds can easily be recreated if needed later.
4162 *
4163 * @returns The new code buffer position.
4164 * @param pReNative The native recompile state.
4165 * @param off The current code buffer position.
4166 * @param idxVar The variable index.
4167 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4168 * call-volatile registers.
4169 */
4170DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4171 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4172{
4173 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4174 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4175 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4176 Assert(!pVar->fRegAcquired);
4177 Assert(!pVar->fSimdReg);
4178
4179 uint8_t const idxRegOld = pVar->idxReg;
4180 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4181 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4182 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4183 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4184 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4185 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4186 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4187 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4188 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4189 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4190
4191 /** @todo Add statistics on this.*/
4192 /** @todo Implement basic variable liveness analysis (python) so variables
4193 * can be freed immediately once no longer used. This has the potential to
4194 * be trashing registers and stack for dead variables.
4195 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4196
4197 /*
4198 * First try move it to a different register, as that's cheaper.
4199 */
4200 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4201 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4202 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4203 if (fRegs)
4204 {
4205 /* Avoid using shadow registers, if possible. */
4206 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4207 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4208 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4209 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4210 }
4211
4212 /*
4213 * Otherwise we must spill the register onto the stack.
4214 */
4215 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4216 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4217 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4218
4219 if (pVar->cbVar == sizeof(RTUINT128U))
4220 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4221 else
4222 {
4223 Assert(pVar->cbVar == sizeof(RTUINT256U));
4224 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4225 }
4226
4227 pVar->idxReg = UINT8_MAX;
4228 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4229 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4230 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4231 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4232 return off;
4233}
4234
4235
4236/**
4237 * Called right before emitting a call instruction to move anything important
4238 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4239 * optionally freeing argument variables.
4240 *
4241 * @returns New code buffer offset, UINT32_MAX on failure.
4242 * @param pReNative The native recompile state.
4243 * @param off The code buffer offset.
4244 * @param cArgs The number of arguments the function call takes.
4245 * It is presumed that the host register part of these have
4246 * been allocated as such already and won't need moving,
4247 * just freeing.
4248 * @param fKeepVars Mask of variables that should keep their register
4249 * assignments. Caller must take care to handle these.
4250 */
4251DECL_HIDDEN_THROW(uint32_t)
4252iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4253{
4254 Assert(!cArgs); RT_NOREF(cArgs);
4255
4256 /* fKeepVars will reduce this mask. */
4257 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4258
4259 /*
4260 * Move anything important out of volatile registers.
4261 */
4262 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4263#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4264 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4265#endif
4266 ;
4267
4268 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4269 if (!fSimdRegsToMove)
4270 { /* likely */ }
4271 else
4272 {
4273 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4274 while (fSimdRegsToMove != 0)
4275 {
4276 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4277 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4278
4279 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4280 {
4281 case kIemNativeWhat_Var:
4282 {
4283 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4285 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4286 Assert(pVar->idxReg == idxSimdReg);
4287 Assert(pVar->fSimdReg);
4288 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4289 {
4290 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4291 idxVar, pVar->enmKind, pVar->idxReg));
4292 if (pVar->enmKind != kIemNativeVarKind_Stack)
4293 pVar->idxReg = UINT8_MAX;
4294 else
4295 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4296 }
4297 else
4298 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4299 continue;
4300 }
4301
4302 case kIemNativeWhat_Arg:
4303 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4304 continue;
4305
4306 case kIemNativeWhat_rc:
4307 case kIemNativeWhat_Tmp:
4308 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4309 continue;
4310
4311 case kIemNativeWhat_FixedReserved:
4312#ifdef RT_ARCH_ARM64
4313 continue; /* On ARM the upper half of the virtual 256-bit register. */
4314#endif
4315
4316 case kIemNativeWhat_FixedTmp:
4317 case kIemNativeWhat_pVCpuFixed:
4318 case kIemNativeWhat_pCtxFixed:
4319 case kIemNativeWhat_PcShadow:
4320 case kIemNativeWhat_Invalid:
4321 case kIemNativeWhat_End:
4322 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4323 }
4324 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4325 }
4326 }
4327
4328 /*
4329 * Do the actual freeing.
4330 */
4331 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4332 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4333 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4334 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4335
4336 /* If there are guest register shadows in any call-volatile register, we
4337 have to clear the corrsponding guest register masks for each register. */
4338 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4339 if (fHstSimdRegsWithGstShadow)
4340 {
4341 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4342 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4343 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4344 do
4345 {
4346 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4347 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4348
4349 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4350
4351#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4352 /*
4353 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4354 * to call volatile registers).
4355 */
4356 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4357 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4358 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4359#endif
4360 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4361 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4362
4363 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4364 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4365 } while (fHstSimdRegsWithGstShadow != 0);
4366 }
4367
4368 return off;
4369}
4370#endif
4371
4372
4373/**
4374 * Called right before emitting a call instruction to move anything important
4375 * out of call-volatile registers, free and flush the call-volatile registers,
4376 * optionally freeing argument variables.
4377 *
4378 * @returns New code buffer offset, UINT32_MAX on failure.
4379 * @param pReNative The native recompile state.
4380 * @param off The code buffer offset.
4381 * @param cArgs The number of arguments the function call takes.
4382 * It is presumed that the host register part of these have
4383 * been allocated as such already and won't need moving,
4384 * just freeing.
4385 * @param fKeepVars Mask of variables that should keep their register
4386 * assignments. Caller must take care to handle these.
4387 */
4388DECL_HIDDEN_THROW(uint32_t)
4389iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4390{
4391 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4392
4393 /* fKeepVars will reduce this mask. */
4394 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4395
4396 /*
4397 * Move anything important out of volatile registers.
4398 */
4399 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4400 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4401 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4402#ifdef IEMNATIVE_REG_FIXED_TMP0
4403 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4404#endif
4405#ifdef IEMNATIVE_REG_FIXED_TMP1
4406 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4407#endif
4408#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4409 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4410#endif
4411 & ~g_afIemNativeCallRegs[cArgs];
4412
4413 fRegsToMove &= pReNative->Core.bmHstRegs;
4414 if (!fRegsToMove)
4415 { /* likely */ }
4416 else
4417 {
4418 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4419 while (fRegsToMove != 0)
4420 {
4421 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4422 fRegsToMove &= ~RT_BIT_32(idxReg);
4423
4424 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4425 {
4426 case kIemNativeWhat_Var:
4427 {
4428 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4429 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4430 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4431 Assert(pVar->idxReg == idxReg);
4432#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4433 Assert(!pVar->fSimdReg);
4434#endif
4435 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4436 {
4437 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4438 idxVar, pVar->enmKind, pVar->idxReg));
4439 if (pVar->enmKind != kIemNativeVarKind_Stack)
4440 pVar->idxReg = UINT8_MAX;
4441 else
4442 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4443 }
4444 else
4445 fRegsToFree &= ~RT_BIT_32(idxReg);
4446 continue;
4447 }
4448
4449 case kIemNativeWhat_Arg:
4450 AssertMsgFailed(("What?!?: %u\n", idxReg));
4451 continue;
4452
4453 case kIemNativeWhat_rc:
4454 case kIemNativeWhat_Tmp:
4455 AssertMsgFailed(("Missing free: %u\n", idxReg));
4456 continue;
4457
4458 case kIemNativeWhat_FixedTmp:
4459 case kIemNativeWhat_pVCpuFixed:
4460 case kIemNativeWhat_pCtxFixed:
4461 case kIemNativeWhat_PcShadow:
4462 case kIemNativeWhat_FixedReserved:
4463 case kIemNativeWhat_Invalid:
4464 case kIemNativeWhat_End:
4465 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4466 }
4467 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4468 }
4469 }
4470
4471 /*
4472 * Do the actual freeing.
4473 */
4474 if (pReNative->Core.bmHstRegs & fRegsToFree)
4475 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4476 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4477 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4478
4479 /* If there are guest register shadows in any call-volatile register, we
4480 have to clear the corrsponding guest register masks for each register. */
4481 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4482 if (fHstRegsWithGstShadow)
4483 {
4484 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4485 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4486 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4487 do
4488 {
4489 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4490 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4491
4492 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4493
4494#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4495 /*
4496 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4497 * to call volatile registers).
4498 */
4499 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4500 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4501 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4502#endif
4503
4504 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4505 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4506 } while (fHstRegsWithGstShadow != 0);
4507 }
4508
4509#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4510 /* Now for the SIMD registers, no argument support for now. */
4511 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4512#endif
4513
4514 return off;
4515}
4516
4517
4518/**
4519 * Flushes a set of guest register shadow copies.
4520 *
4521 * This is usually done after calling a threaded function or a C-implementation
4522 * of an instruction.
4523 *
4524 * @param pReNative The native recompile state.
4525 * @param fGstRegs Set of guest registers to flush.
4526 */
4527DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4528{
4529 /*
4530 * Reduce the mask by what's currently shadowed
4531 */
4532 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4533 fGstRegs &= bmGstRegShadowsOld;
4534 if (fGstRegs)
4535 {
4536 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4537 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4538 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4539 if (bmGstRegShadowsNew)
4540 {
4541 /*
4542 * Partial.
4543 */
4544 do
4545 {
4546 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4547 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4548 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4549 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4550 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4551#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4552 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4553#endif
4554
4555 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4556 fGstRegs &= ~fInThisHstReg;
4557 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4558 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4559 if (!fGstRegShadowsNew)
4560 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4561 } while (fGstRegs != 0);
4562 }
4563 else
4564 {
4565 /*
4566 * Clear all.
4567 */
4568 do
4569 {
4570 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4571 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4572 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4573 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4574 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4575#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4576 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4577#endif
4578
4579 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4580 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4581 } while (fGstRegs != 0);
4582 pReNative->Core.bmHstRegsWithGstShadow = 0;
4583 }
4584 }
4585}
4586
4587
4588/**
4589 * Flushes guest register shadow copies held by a set of host registers.
4590 *
4591 * This is used with the TLB lookup code for ensuring that we don't carry on
4592 * with any guest shadows in volatile registers, as these will get corrupted by
4593 * a TLB miss.
4594 *
4595 * @param pReNative The native recompile state.
4596 * @param fHstRegs Set of host registers to flush guest shadows for.
4597 */
4598DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4599{
4600 /*
4601 * Reduce the mask by what's currently shadowed.
4602 */
4603 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4604 fHstRegs &= bmHstRegsWithGstShadowOld;
4605 if (fHstRegs)
4606 {
4607 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4608 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4609 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4610 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4611 if (bmHstRegsWithGstShadowNew)
4612 {
4613 /*
4614 * Partial (likely).
4615 */
4616 uint64_t fGstShadows = 0;
4617 do
4618 {
4619 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4620 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4621 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4622 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4623#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4624 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4625#endif
4626
4627 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4628 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4629 fHstRegs &= ~RT_BIT_32(idxHstReg);
4630 } while (fHstRegs != 0);
4631 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4632 }
4633 else
4634 {
4635 /*
4636 * Clear all.
4637 */
4638 do
4639 {
4640 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4641 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4642 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4643 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4644#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4645 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4646#endif
4647
4648 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4649 fHstRegs &= ~RT_BIT_32(idxHstReg);
4650 } while (fHstRegs != 0);
4651 pReNative->Core.bmGstRegShadows = 0;
4652 }
4653 }
4654}
4655
4656
4657/**
4658 * Restores guest shadow copies in volatile registers.
4659 *
4660 * This is used after calling a helper function (think TLB miss) to restore the
4661 * register state of volatile registers.
4662 *
4663 * @param pReNative The native recompile state.
4664 * @param off The code buffer offset.
4665 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4666 * be active (allocated) w/o asserting. Hack.
4667 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4668 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4669 */
4670DECL_HIDDEN_THROW(uint32_t)
4671iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4672{
4673 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4674 if (fHstRegs)
4675 {
4676 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4677 do
4678 {
4679 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4680
4681 /* It's not fatal if a register is active holding a variable that
4682 shadowing a guest register, ASSUMING all pending guest register
4683 writes were flushed prior to the helper call. However, we'll be
4684 emitting duplicate restores, so it wasts code space. */
4685 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4686 RT_NOREF(fHstRegsActiveShadows);
4687
4688 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4689#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4690 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4691#endif
4692 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4693 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4694 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4695
4696 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4697 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4698
4699 fHstRegs &= ~RT_BIT_32(idxHstReg);
4700 } while (fHstRegs != 0);
4701 }
4702 return off;
4703}
4704
4705
4706
4707
4708/*********************************************************************************************************************************
4709* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4710*********************************************************************************************************************************/
4711#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4712
4713/**
4714 * Info about shadowed guest SIMD register values.
4715 * @see IEMNATIVEGSTSIMDREG
4716 */
4717static struct
4718{
4719 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4720 uint32_t offXmm;
4721 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4722 uint32_t offYmm;
4723 /** Name (for logging). */
4724 const char *pszName;
4725} const g_aGstSimdShadowInfo[] =
4726{
4727#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4728 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4729 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4730 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4731 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4732 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4733 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4734 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4735 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4736 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4737 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4738 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4739 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4740 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4741 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4742 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4743 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4744 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4745#undef CPUMCTX_OFF_AND_SIZE
4746};
4747AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4748
4749
4750/**
4751 * Frees a temporary SIMD register.
4752 *
4753 * Any shadow copies of guest registers assigned to the host register will not
4754 * be flushed by this operation.
4755 */
4756DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4757{
4758 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4759 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4760 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4761 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4762 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4763}
4764
4765
4766/**
4767 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4768 *
4769 * @returns New code bufferoffset.
4770 * @param pReNative The native recompile state.
4771 * @param off Current code buffer position.
4772 * @param enmGstSimdReg The guest SIMD register to flush.
4773 */
4774DECL_HIDDEN_THROW(uint32_t)
4775iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4776{
4777 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4778
4779 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4780 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4781 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4782 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4783
4784 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4785 {
4786 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4787 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4788 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4789 }
4790
4791 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4792 {
4793 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4794 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4795 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4796 }
4797
4798 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4799 return off;
4800}
4801
4802
4803/**
4804 * Flush the given set of guest SIMD registers if marked as dirty.
4805 *
4806 * @returns New code buffer offset.
4807 * @param pReNative The native recompile state.
4808 * @param off Current code buffer position.
4809 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4810 */
4811DECL_HIDDEN_THROW(uint32_t)
4812iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4813{
4814 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4815 & fFlushGstSimdReg;
4816 if (bmGstSimdRegShadowDirty)
4817 {
4818# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4819 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4820 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4821# endif
4822
4823 do
4824 {
4825 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4826 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4827 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4828 } while (bmGstSimdRegShadowDirty);
4829 }
4830
4831 return off;
4832}
4833
4834
4835#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4836/**
4837 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4838 *
4839 * @returns New code buffer offset.
4840 * @param pReNative The native recompile state.
4841 * @param off Current code buffer position.
4842 * @param idxHstSimdReg The host SIMD register.
4843 *
4844 * @note This doesn't do any unshadowing of guest registers from the host register.
4845 */
4846DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4847{
4848 /* We need to flush any pending guest register writes this host register shadows. */
4849 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4850 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4851 if (bmGstSimdRegShadowDirty)
4852 {
4853# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4854 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4855 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4856# endif
4857
4858 do
4859 {
4860 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4861 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4862 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4863 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4864 } while (bmGstSimdRegShadowDirty);
4865 }
4866
4867 return off;
4868}
4869#endif
4870
4871
4872/**
4873 * Locate a register, possibly freeing one up.
4874 *
4875 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4876 * failed.
4877 *
4878 * @returns Host register number on success. Returns UINT8_MAX if no registers
4879 * found, the caller is supposed to deal with this and raise a
4880 * allocation type specific status code (if desired).
4881 *
4882 * @throws VBox status code if we're run into trouble spilling a variable of
4883 * recording debug info. Does NOT throw anything if we're out of
4884 * registers, though.
4885 */
4886static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4887 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4888{
4889 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4890 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4891 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4892
4893 /*
4894 * Try a freed register that's shadowing a guest register.
4895 */
4896 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4897 if (fRegs)
4898 {
4899 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4900
4901#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4902 /*
4903 * When we have livness information, we use it to kick out all shadowed
4904 * guest register that will not be needed any more in this TB. If we're
4905 * lucky, this may prevent us from ending up here again.
4906 *
4907 * Note! We must consider the previous entry here so we don't free
4908 * anything that the current threaded function requires (current
4909 * entry is produced by the next threaded function).
4910 */
4911 uint32_t const idxCurCall = pReNative->idxCurCall;
4912 if (idxCurCall > 0)
4913 {
4914 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4915
4916# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4917 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4918 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4919 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4920#else
4921 /* Construct a mask of the registers not in the read or write state.
4922 Note! We could skips writes, if they aren't from us, as this is just
4923 a hack to prevent trashing registers that have just been written
4924 or will be written when we retire the current instruction. */
4925 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4926 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4927 & IEMLIVENESSBIT_MASK;
4928#endif
4929 /* If it matches any shadowed registers. */
4930 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4931 {
4932 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4933 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4934 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4935
4936 /* See if we've got any unshadowed registers we can return now. */
4937 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4938 if (fUnshadowedRegs)
4939 {
4940 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4941 return (fPreferVolatile
4942 ? ASMBitFirstSetU32(fUnshadowedRegs)
4943 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4944 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4945 - 1;
4946 }
4947 }
4948 }
4949#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4950
4951 unsigned const idxReg = (fPreferVolatile
4952 ? ASMBitFirstSetU32(fRegs)
4953 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4954 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4955 - 1;
4956
4957 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4958 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4959 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4960 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4961
4962 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4963 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4964
4965 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4966 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4967 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4968 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4969 return idxReg;
4970 }
4971
4972 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4973
4974 /*
4975 * Try free up a variable that's in a register.
4976 *
4977 * We do two rounds here, first evacuating variables we don't need to be
4978 * saved on the stack, then in the second round move things to the stack.
4979 */
4980 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4981 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4982 {
4983 uint32_t fVars = pReNative->Core.bmVars;
4984 while (fVars)
4985 {
4986 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4987 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4988 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4989 continue;
4990
4991 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4992 && (RT_BIT_32(idxReg) & fRegMask)
4993 && ( iLoop == 0
4994 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4995 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4996 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4997 {
4998 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4999 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5000 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5001 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5002 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5003 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5004
5005 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5006 {
5007 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5008 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5009 }
5010
5011 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5012 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5013
5014 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5015 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5016 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5017 return idxReg;
5018 }
5019 fVars &= ~RT_BIT_32(idxVar);
5020 }
5021 }
5022
5023 AssertFailed();
5024 return UINT8_MAX;
5025}
5026
5027
5028/**
5029 * Flushes a set of guest register shadow copies.
5030 *
5031 * This is usually done after calling a threaded function or a C-implementation
5032 * of an instruction.
5033 *
5034 * @param pReNative The native recompile state.
5035 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5036 */
5037DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5038{
5039 /*
5040 * Reduce the mask by what's currently shadowed
5041 */
5042 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5043 fGstSimdRegs &= bmGstSimdRegShadows;
5044 if (fGstSimdRegs)
5045 {
5046 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5047 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5048 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5049 if (bmGstSimdRegShadowsNew)
5050 {
5051 /*
5052 * Partial.
5053 */
5054 do
5055 {
5056 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5057 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5058 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5059 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5060 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5061 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5062
5063 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5064 fGstSimdRegs &= ~fInThisHstReg;
5065 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5066 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5067 if (!fGstRegShadowsNew)
5068 {
5069 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5070 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5071 }
5072 } while (fGstSimdRegs != 0);
5073 }
5074 else
5075 {
5076 /*
5077 * Clear all.
5078 */
5079 do
5080 {
5081 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5082 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5083 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5084 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5085 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5086 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5087
5088 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5089 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5090 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5091 } while (fGstSimdRegs != 0);
5092 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5093 }
5094 }
5095}
5096
5097
5098/**
5099 * Allocates a temporary host SIMD register.
5100 *
5101 * This may emit code to save register content onto the stack in order to free
5102 * up a register.
5103 *
5104 * @returns The host register number; throws VBox status code on failure,
5105 * so no need to check the return value.
5106 * @param pReNative The native recompile state.
5107 * @param poff Pointer to the variable with the code buffer position.
5108 * This will be update if we need to move a variable from
5109 * register to stack in order to satisfy the request.
5110 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5111 * registers (@c true, default) or the other way around
5112 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5113 */
5114DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5115{
5116 /*
5117 * Try find a completely unused register, preferably a call-volatile one.
5118 */
5119 uint8_t idxSimdReg;
5120 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5121 & ~pReNative->Core.bmHstRegsWithGstShadow
5122 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5123 if (fRegs)
5124 {
5125 if (fPreferVolatile)
5126 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5127 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5128 else
5129 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5130 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5131 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5132 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5133
5134 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5135 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5136 }
5137 else
5138 {
5139 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5140 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5141 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5142 }
5143
5144 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5145 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5146}
5147
5148
5149/**
5150 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5151 * registers.
5152 *
5153 * @returns The host register number; throws VBox status code on failure,
5154 * so no need to check the return value.
5155 * @param pReNative The native recompile state.
5156 * @param poff Pointer to the variable with the code buffer position.
5157 * This will be update if we need to move a variable from
5158 * register to stack in order to satisfy the request.
5159 * @param fRegMask Mask of acceptable registers.
5160 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5161 * registers (@c true, default) or the other way around
5162 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5163 */
5164DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5165 bool fPreferVolatile /*= true*/)
5166{
5167 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5168 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5169
5170 /*
5171 * Try find a completely unused register, preferably a call-volatile one.
5172 */
5173 uint8_t idxSimdReg;
5174 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5175 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5176 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5177 & fRegMask;
5178 if (fRegs)
5179 {
5180 if (fPreferVolatile)
5181 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5182 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5183 else
5184 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5185 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5186 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5187 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5188
5189 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5190 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5191 }
5192 else
5193 {
5194 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5195 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5196 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5197 }
5198
5199 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5200 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5201}
5202
5203
5204/**
5205 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5206 *
5207 * @param pReNative The native recompile state.
5208 * @param idxHstSimdReg The host SIMD register to update the state for.
5209 * @param enmLoadSz The load size to set.
5210 */
5211DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5212 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5213{
5214 /* Everything valid already? -> nothing to do. */
5215 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5216 return;
5217
5218 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5219 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5220 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5221 {
5222 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5223 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5224 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5225 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5226 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5227 }
5228}
5229
5230
5231static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5232 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5233{
5234 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5235 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5236 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5237 {
5238# ifdef RT_ARCH_ARM64
5239 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5240 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5241# endif
5242
5243 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5244 {
5245 switch (enmLoadSzDst)
5246 {
5247 case kIemNativeGstSimdRegLdStSz_256:
5248 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5249 break;
5250 case kIemNativeGstSimdRegLdStSz_Low128:
5251 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5252 break;
5253 case kIemNativeGstSimdRegLdStSz_High128:
5254 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5255 break;
5256 default:
5257 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5258 }
5259
5260 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5261 }
5262 }
5263 else
5264 {
5265 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5266 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5267 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5268 }
5269
5270 return off;
5271}
5272
5273
5274/**
5275 * Allocates a temporary host SIMD register for keeping a guest
5276 * SIMD register value.
5277 *
5278 * Since we may already have a register holding the guest register value,
5279 * code will be emitted to do the loading if that's not the case. Code may also
5280 * be emitted if we have to free up a register to satify the request.
5281 *
5282 * @returns The host register number; throws VBox status code on failure, so no
5283 * need to check the return value.
5284 * @param pReNative The native recompile state.
5285 * @param poff Pointer to the variable with the code buffer
5286 * position. This will be update if we need to move a
5287 * variable from register to stack in order to satisfy
5288 * the request.
5289 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5290 * @param enmIntendedUse How the caller will be using the host register.
5291 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5292 * register is okay (default). The ASSUMPTION here is
5293 * that the caller has already flushed all volatile
5294 * registers, so this is only applied if we allocate a
5295 * new register.
5296 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5297 */
5298DECL_HIDDEN_THROW(uint8_t)
5299iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5300 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5301 bool fNoVolatileRegs /*= false*/)
5302{
5303 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5304#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5305 AssertMsg( pReNative->idxCurCall == 0
5306 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5307 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5308 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5309 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5310 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5311 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5312#endif
5313#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5314 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5315#endif
5316 uint32_t const fRegMask = !fNoVolatileRegs
5317 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5318 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5319
5320 /*
5321 * First check if the guest register value is already in a host register.
5322 */
5323 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5324 {
5325 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5326 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5327 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5328 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5329
5330 /* It's not supposed to be allocated... */
5331 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5332 {
5333 /*
5334 * If the register will trash the guest shadow copy, try find a
5335 * completely unused register we can use instead. If that fails,
5336 * we need to disassociate the host reg from the guest reg.
5337 */
5338 /** @todo would be nice to know if preserving the register is in any way helpful. */
5339 /* If the purpose is calculations, try duplicate the register value as
5340 we'll be clobbering the shadow. */
5341 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5342 && ( ~pReNative->Core.bmHstSimdRegs
5343 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5344 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5345 {
5346 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5347
5348 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5349
5350 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5351 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5352 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5353 idxSimdReg = idxRegNew;
5354 }
5355 /* If the current register matches the restrictions, go ahead and allocate
5356 it for the caller. */
5357 else if (fRegMask & RT_BIT_32(idxSimdReg))
5358 {
5359 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5360 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5361 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5362 {
5363 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5364 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5365 else
5366 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5367 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5368 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5369 }
5370 else
5371 {
5372 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5373 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5374 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5375 }
5376 }
5377 /* Otherwise, allocate a register that satisfies the caller and transfer
5378 the shadowing if compatible with the intended use. (This basically
5379 means the call wants a non-volatile register (RSP push/pop scenario).) */
5380 else
5381 {
5382 Assert(fNoVolatileRegs);
5383 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5384 !fNoVolatileRegs
5385 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5386 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5387 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5388 {
5389 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5390 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5391 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5392 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5393 }
5394 else
5395 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5396 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5397 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5398 idxSimdReg = idxRegNew;
5399 }
5400 }
5401 else
5402 {
5403 /*
5404 * Oops. Shadowed guest register already allocated!
5405 *
5406 * Allocate a new register, copy the value and, if updating, the
5407 * guest shadow copy assignment to the new register.
5408 */
5409 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5410 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5411 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5412 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5413
5414 /** @todo share register for readonly access. */
5415 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5416 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5417
5418 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5419 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5420 else
5421 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5422
5423 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5424 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5425 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5426 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5427 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5428 else
5429 {
5430 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5431 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5432 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5433 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5434 }
5435 idxSimdReg = idxRegNew;
5436 }
5437 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5438
5439#ifdef VBOX_STRICT
5440 /* Strict builds: Check that the value is correct. */
5441 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5442 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5443#endif
5444
5445 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5446 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5447 {
5448# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5449 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5450 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5451# endif
5452
5453 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5454 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5455 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5456 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5457 else
5458 {
5459 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5460 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5461 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5462 }
5463 }
5464
5465 return idxSimdReg;
5466 }
5467
5468 /*
5469 * Allocate a new register, load it with the guest value and designate it as a copy of the
5470 */
5471 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5472
5473 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5474 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5475 else
5476 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5477
5478 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5479 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5480
5481 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5482 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5483 {
5484# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5485 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5486 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5487# endif
5488
5489 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5490 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5491 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5492 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5493 else
5494 {
5495 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5496 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5497 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5498 }
5499 }
5500
5501 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5502 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5503
5504 return idxRegNew;
5505}
5506
5507
5508/**
5509 * Flushes guest SIMD register shadow copies held by a set of host registers.
5510 *
5511 * This is used whenever calling an external helper for ensuring that we don't carry on
5512 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5513 *
5514 * @param pReNative The native recompile state.
5515 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5516 */
5517DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5518{
5519 /*
5520 * Reduce the mask by what's currently shadowed.
5521 */
5522 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5523 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5524 if (fHstSimdRegs)
5525 {
5526 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5527 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5528 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5529 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5530 if (bmHstSimdRegsWithGstShadowNew)
5531 {
5532 /*
5533 * Partial (likely).
5534 */
5535 uint64_t fGstShadows = 0;
5536 do
5537 {
5538 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5539 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5540 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5541 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5542 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5543 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5544
5545 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5546 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5547 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5548 } while (fHstSimdRegs != 0);
5549 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5550 }
5551 else
5552 {
5553 /*
5554 * Clear all.
5555 */
5556 do
5557 {
5558 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5559 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5560 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5561 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5562 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5563 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5564
5565 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5566 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5567 } while (fHstSimdRegs != 0);
5568 pReNative->Core.bmGstSimdRegShadows = 0;
5569 }
5570 }
5571}
5572#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5573
5574
5575
5576/*********************************************************************************************************************************
5577* Code emitters for flushing pending guest register writes and sanity checks *
5578*********************************************************************************************************************************/
5579
5580#ifdef VBOX_STRICT
5581/**
5582 * Does internal register allocator sanity checks.
5583 */
5584DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5585{
5586 /*
5587 * Iterate host registers building a guest shadowing set.
5588 */
5589 uint64_t bmGstRegShadows = 0;
5590 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5591 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5592 while (bmHstRegsWithGstShadow)
5593 {
5594 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5595 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5596 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5597
5598 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5599 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5600 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5601 bmGstRegShadows |= fThisGstRegShadows;
5602 while (fThisGstRegShadows)
5603 {
5604 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5605 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5606 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5607 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5608 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5609 }
5610 }
5611 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5612 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5613 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5614
5615 /*
5616 * Now the other way around, checking the guest to host index array.
5617 */
5618 bmHstRegsWithGstShadow = 0;
5619 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5620 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5621 while (bmGstRegShadows)
5622 {
5623 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5624 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5625 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5626
5627 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5628 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5629 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5630 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5631 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5632 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5633 }
5634 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5635 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5636 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5637}
5638#endif /* VBOX_STRICT */
5639
5640
5641/**
5642 * Flushes any delayed guest register writes.
5643 *
5644 * This must be called prior to calling CImpl functions and any helpers that use
5645 * the guest state (like raising exceptions) and such.
5646 *
5647 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5648 * the caller if it wishes to do so.
5649 */
5650DECL_HIDDEN_THROW(uint32_t)
5651iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5652{
5653#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5654 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5655 off = iemNativeEmitPcWriteback(pReNative, off);
5656#else
5657 RT_NOREF(pReNative, fGstShwExcept);
5658#endif
5659
5660#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5661 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5662#endif
5663
5664#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5665 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5666#endif
5667
5668 return off;
5669}
5670
5671
5672#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5673/**
5674 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5675 */
5676DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5677{
5678 Assert(pReNative->Core.offPc);
5679# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5680 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5681 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5682# endif
5683
5684# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5685 /* Allocate a temporary PC register. */
5686 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5687
5688 /* Perform the addition and store the result. */
5689 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5690 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5691
5692 /* Free but don't flush the PC register. */
5693 iemNativeRegFreeTmp(pReNative, idxPcReg);
5694# else
5695 /* Compare the shadow with the context value, they should match. */
5696 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5697 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5698# endif
5699
5700 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5701 pReNative->Core.offPc = 0;
5702 pReNative->Core.cInstrPcUpdateSkipped = 0;
5703
5704 return off;
5705}
5706#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5707
5708
5709/*********************************************************************************************************************************
5710* Code Emitters (larger snippets) *
5711*********************************************************************************************************************************/
5712
5713/**
5714 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5715 * extending to 64-bit width.
5716 *
5717 * @returns New code buffer offset on success, UINT32_MAX on failure.
5718 * @param pReNative .
5719 * @param off The current code buffer position.
5720 * @param idxHstReg The host register to load the guest register value into.
5721 * @param enmGstReg The guest register to load.
5722 *
5723 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5724 * that is something the caller needs to do if applicable.
5725 */
5726DECL_HIDDEN_THROW(uint32_t)
5727iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5728{
5729 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5730 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5731
5732 switch (g_aGstShadowInfo[enmGstReg].cb)
5733 {
5734 case sizeof(uint64_t):
5735 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5736 case sizeof(uint32_t):
5737 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5738 case sizeof(uint16_t):
5739 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5740#if 0 /* not present in the table. */
5741 case sizeof(uint8_t):
5742 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5743#endif
5744 default:
5745 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5746 }
5747}
5748
5749
5750#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5751/**
5752 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5753 *
5754 * @returns New code buffer offset on success, UINT32_MAX on failure.
5755 * @param pReNative The recompiler state.
5756 * @param off The current code buffer position.
5757 * @param idxHstSimdReg The host register to load the guest register value into.
5758 * @param enmGstSimdReg The guest register to load.
5759 * @param enmLoadSz The load size of the register.
5760 *
5761 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5762 * that is something the caller needs to do if applicable.
5763 */
5764DECL_HIDDEN_THROW(uint32_t)
5765iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5766 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5767{
5768 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5769
5770 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5771 switch (enmLoadSz)
5772 {
5773 case kIemNativeGstSimdRegLdStSz_256:
5774 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5775 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5776 case kIemNativeGstSimdRegLdStSz_Low128:
5777 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5778 case kIemNativeGstSimdRegLdStSz_High128:
5779 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5780 default:
5781 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5782 }
5783}
5784#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5785
5786#ifdef VBOX_STRICT
5787
5788/**
5789 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5790 *
5791 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5792 * Trashes EFLAGS on AMD64.
5793 */
5794DECL_HIDDEN_THROW(uint32_t)
5795iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5796{
5797# ifdef RT_ARCH_AMD64
5798 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5799
5800 /* rol reg64, 32 */
5801 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5802 pbCodeBuf[off++] = 0xc1;
5803 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5804 pbCodeBuf[off++] = 32;
5805
5806 /* test reg32, ffffffffh */
5807 if (idxReg >= 8)
5808 pbCodeBuf[off++] = X86_OP_REX_B;
5809 pbCodeBuf[off++] = 0xf7;
5810 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5811 pbCodeBuf[off++] = 0xff;
5812 pbCodeBuf[off++] = 0xff;
5813 pbCodeBuf[off++] = 0xff;
5814 pbCodeBuf[off++] = 0xff;
5815
5816 /* je/jz +1 */
5817 pbCodeBuf[off++] = 0x74;
5818 pbCodeBuf[off++] = 0x01;
5819
5820 /* int3 */
5821 pbCodeBuf[off++] = 0xcc;
5822
5823 /* rol reg64, 32 */
5824 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5825 pbCodeBuf[off++] = 0xc1;
5826 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5827 pbCodeBuf[off++] = 32;
5828
5829# elif defined(RT_ARCH_ARM64)
5830 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5831 /* lsr tmp0, reg64, #32 */
5832 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5833 /* cbz tmp0, +1 */
5834 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5835 /* brk #0x1100 */
5836 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5837
5838# else
5839# error "Port me!"
5840# endif
5841 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5842 return off;
5843}
5844
5845
5846/**
5847 * Emitting code that checks that the content of register @a idxReg is the same
5848 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5849 * instruction if that's not the case.
5850 *
5851 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5852 * Trashes EFLAGS on AMD64.
5853 */
5854DECL_HIDDEN_THROW(uint32_t)
5855iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5856{
5857#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5858 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5859 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5860 return off;
5861#endif
5862
5863# ifdef RT_ARCH_AMD64
5864 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5865
5866 /* cmp reg, [mem] */
5867 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5868 {
5869 if (idxReg >= 8)
5870 pbCodeBuf[off++] = X86_OP_REX_R;
5871 pbCodeBuf[off++] = 0x38;
5872 }
5873 else
5874 {
5875 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5876 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5877 else
5878 {
5879 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5880 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5881 else
5882 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5883 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5884 if (idxReg >= 8)
5885 pbCodeBuf[off++] = X86_OP_REX_R;
5886 }
5887 pbCodeBuf[off++] = 0x39;
5888 }
5889 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5890
5891 /* je/jz +1 */
5892 pbCodeBuf[off++] = 0x74;
5893 pbCodeBuf[off++] = 0x01;
5894
5895 /* int3 */
5896 pbCodeBuf[off++] = 0xcc;
5897
5898 /* For values smaller than the register size, we must check that the rest
5899 of the register is all zeros. */
5900 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5901 {
5902 /* test reg64, imm32 */
5903 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5904 pbCodeBuf[off++] = 0xf7;
5905 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5906 pbCodeBuf[off++] = 0;
5907 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5908 pbCodeBuf[off++] = 0xff;
5909 pbCodeBuf[off++] = 0xff;
5910
5911 /* je/jz +1 */
5912 pbCodeBuf[off++] = 0x74;
5913 pbCodeBuf[off++] = 0x01;
5914
5915 /* int3 */
5916 pbCodeBuf[off++] = 0xcc;
5917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5918 }
5919 else
5920 {
5921 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5922 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5923 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5924 }
5925
5926# elif defined(RT_ARCH_ARM64)
5927 /* mov TMP0, [gstreg] */
5928 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5929
5930 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5931 /* sub tmp0, tmp0, idxReg */
5932 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5933 /* cbz tmp0, +1 */
5934 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5935 /* brk #0x1000+enmGstReg */
5936 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5937 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5938
5939# else
5940# error "Port me!"
5941# endif
5942 return off;
5943}
5944
5945
5946# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5947# ifdef RT_ARCH_AMD64
5948/**
5949 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5950 */
5951DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5952{
5953 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5954 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5955 if (idxSimdReg >= 8)
5956 pbCodeBuf[off++] = X86_OP_REX_R;
5957 pbCodeBuf[off++] = 0x0f;
5958 pbCodeBuf[off++] = 0x38;
5959 pbCodeBuf[off++] = 0x29;
5960 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5961
5962 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5963 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5964 pbCodeBuf[off++] = X86_OP_REX_W
5965 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5966 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5967 pbCodeBuf[off++] = 0x0f;
5968 pbCodeBuf[off++] = 0x3a;
5969 pbCodeBuf[off++] = 0x16;
5970 pbCodeBuf[off++] = 0xeb;
5971 pbCodeBuf[off++] = 0x00;
5972
5973 /* cmp tmp0, 0xffffffffffffffff. */
5974 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5975 pbCodeBuf[off++] = 0x83;
5976 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5977 pbCodeBuf[off++] = 0xff;
5978
5979 /* je/jz +1 */
5980 pbCodeBuf[off++] = 0x74;
5981 pbCodeBuf[off++] = 0x01;
5982
5983 /* int3 */
5984 pbCodeBuf[off++] = 0xcc;
5985
5986 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5987 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5988 pbCodeBuf[off++] = X86_OP_REX_W
5989 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5990 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5991 pbCodeBuf[off++] = 0x0f;
5992 pbCodeBuf[off++] = 0x3a;
5993 pbCodeBuf[off++] = 0x16;
5994 pbCodeBuf[off++] = 0xeb;
5995 pbCodeBuf[off++] = 0x01;
5996
5997 /* cmp tmp0, 0xffffffffffffffff. */
5998 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5999 pbCodeBuf[off++] = 0x83;
6000 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6001 pbCodeBuf[off++] = 0xff;
6002
6003 /* je/jz +1 */
6004 pbCodeBuf[off++] = 0x74;
6005 pbCodeBuf[off++] = 0x01;
6006
6007 /* int3 */
6008 pbCodeBuf[off++] = 0xcc;
6009
6010 return off;
6011}
6012# endif
6013
6014
6015/**
6016 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6017 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6018 * instruction if that's not the case.
6019 *
6020 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6021 * Trashes EFLAGS on AMD64.
6022 */
6023DECL_HIDDEN_THROW(uint32_t)
6024iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6025 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6026{
6027 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6028 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6029 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6030 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6031 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6032 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6033 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6034 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6035 return off;
6036
6037# ifdef RT_ARCH_AMD64
6038 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6039 {
6040 /* movdqa vectmp0, idxSimdReg */
6041 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6042
6043 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6044
6045 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6046 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6047 }
6048
6049 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6050 {
6051 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6052 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6053
6054 /* vextracti128 vectmp0, idxSimdReg, 1 */
6055 pbCodeBuf[off++] = X86_OP_VEX3;
6056 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6057 | X86_OP_VEX3_BYTE1_X
6058 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6059 | 0x03; /* Opcode map */
6060 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6061 pbCodeBuf[off++] = 0x39;
6062 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6063 pbCodeBuf[off++] = 0x01;
6064
6065 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6066 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6067 }
6068# elif defined(RT_ARCH_ARM64)
6069 /* mov vectmp0, [gstreg] */
6070 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6071
6072 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6073 {
6074 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6075 /* eor vectmp0, vectmp0, idxSimdReg */
6076 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6077 /* uaddlv vectmp0, vectmp0.16B */
6078 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6079 /* umov tmp0, vectmp0.H[0] */
6080 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6081 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6082 /* cbz tmp0, +1 */
6083 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6084 /* brk #0x1000+enmGstReg */
6085 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6086 }
6087
6088 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6089 {
6090 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6091 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6092 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6093 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6094 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6095 /* umov tmp0, (vectmp0 + 1).H[0] */
6096 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6097 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6098 /* cbz tmp0, +1 */
6099 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6100 /* brk #0x1000+enmGstReg */
6101 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6102 }
6103
6104# else
6105# error "Port me!"
6106# endif
6107
6108 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6109 return off;
6110}
6111# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6112
6113
6114/**
6115 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6116 * important bits.
6117 *
6118 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6119 * Trashes EFLAGS on AMD64.
6120 */
6121DECL_HIDDEN_THROW(uint32_t)
6122iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6123{
6124 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6125 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6126 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6127 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6128
6129#ifdef RT_ARCH_AMD64
6130 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6131
6132 /* je/jz +1 */
6133 pbCodeBuf[off++] = 0x74;
6134 pbCodeBuf[off++] = 0x01;
6135
6136 /* int3 */
6137 pbCodeBuf[off++] = 0xcc;
6138
6139# elif defined(RT_ARCH_ARM64)
6140 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6141
6142 /* b.eq +1 */
6143 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6144 /* brk #0x2000 */
6145 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6146
6147# else
6148# error "Port me!"
6149# endif
6150 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6151
6152 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6153 return off;
6154}
6155
6156#endif /* VBOX_STRICT */
6157
6158
6159#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6160/**
6161 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6162 */
6163DECL_HIDDEN_THROW(uint32_t)
6164iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6165{
6166 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6167
6168 fEflNeeded &= X86_EFL_STATUS_BITS;
6169 if (fEflNeeded)
6170 {
6171# ifdef RT_ARCH_AMD64
6172 /* test dword [pVCpu + offVCpu], imm32 */
6173 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6174 if (fEflNeeded <= 0xff)
6175 {
6176 pCodeBuf[off++] = 0xf6;
6177 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6178 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6179 }
6180 else
6181 {
6182 pCodeBuf[off++] = 0xf7;
6183 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6184 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6185 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6186 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6187 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6188 }
6189
6190 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6191 pCodeBuf[off++] = 0xcc;
6192
6193 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6194
6195# else
6196 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6197 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6198 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6199# ifdef RT_ARCH_ARM64
6200 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6201 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6202# else
6203# error "Port me!"
6204# endif
6205 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6206# endif
6207 }
6208 return off;
6209}
6210#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6211
6212
6213/**
6214 * Emits a code for checking the return code of a call and rcPassUp, returning
6215 * from the code if either are non-zero.
6216 */
6217DECL_HIDDEN_THROW(uint32_t)
6218iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6219{
6220#ifdef RT_ARCH_AMD64
6221 /*
6222 * AMD64: eax = call status code.
6223 */
6224
6225 /* edx = rcPassUp */
6226 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6227# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6228 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6229# endif
6230
6231 /* edx = eax | rcPassUp */
6232 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6233 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6234 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6235 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6236
6237 /* Jump to non-zero status return path. */
6238 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeExitReason_NonZeroRetOrPassUp);
6239
6240 /* done. */
6241
6242#elif RT_ARCH_ARM64
6243 /*
6244 * ARM64: w0 = call status code.
6245 */
6246# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6247 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6248# endif
6249 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6250
6251 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6252
6253 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6254
6255 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pu32CodeBuf, off, ARMV8_A64_REG_X4, true /*f64Bit*/,
6256 kIemNativeExitReason_NonZeroRetOrPassUp);
6257
6258#else
6259# error "port me"
6260#endif
6261 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6262 RT_NOREF_PV(idxInstr);
6263 return off;
6264}
6265
6266
6267/**
6268 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6269 * raising a \#GP(0) if it isn't.
6270 *
6271 * @returns New code buffer offset, UINT32_MAX on failure.
6272 * @param pReNative The native recompile state.
6273 * @param off The code buffer offset.
6274 * @param idxAddrReg The host register with the address to check.
6275 * @param idxInstr The current instruction.
6276 */
6277DECL_HIDDEN_THROW(uint32_t)
6278iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6279{
6280 /*
6281 * Make sure we don't have any outstanding guest register writes as we may
6282 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6283 */
6284 off = iemNativeRegFlushPendingWrites(pReNative, off);
6285
6286#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6287 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6288#else
6289 RT_NOREF(idxInstr);
6290#endif
6291
6292#ifdef RT_ARCH_AMD64
6293 /*
6294 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6295 * return raisexcpt();
6296 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6297 */
6298 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6299
6300 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6301 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6302 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6303 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6304 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeExitReason_RaiseGp0);
6305
6306 iemNativeRegFreeTmp(pReNative, iTmpReg);
6307
6308#elif defined(RT_ARCH_ARM64)
6309 /*
6310 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6311 * return raisexcpt();
6312 * ----
6313 * mov x1, 0x800000000000
6314 * add x1, x0, x1
6315 * cmp xzr, x1, lsr 48
6316 * b.ne .Lraisexcpt
6317 */
6318 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6319
6320 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6321 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6322 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6323 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeExitReason_RaiseGp0);
6324
6325 iemNativeRegFreeTmp(pReNative, iTmpReg);
6326
6327#else
6328# error "Port me"
6329#endif
6330 return off;
6331}
6332
6333
6334/**
6335 * Emits code to check if that the content of @a idxAddrReg is within the limit
6336 * of CS, raising a \#GP(0) if it isn't.
6337 *
6338 * @returns New code buffer offset; throws VBox status code on error.
6339 * @param pReNative The native recompile state.
6340 * @param off The code buffer offset.
6341 * @param idxAddrReg The host register (32-bit) with the address to
6342 * check.
6343 * @param idxInstr The current instruction.
6344 */
6345DECL_HIDDEN_THROW(uint32_t)
6346iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6347 uint8_t idxAddrReg, uint8_t idxInstr)
6348{
6349 /*
6350 * Make sure we don't have any outstanding guest register writes as we may
6351 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6352 */
6353 off = iemNativeRegFlushPendingWrites(pReNative, off);
6354
6355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6357#else
6358 RT_NOREF(idxInstr);
6359#endif
6360
6361 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6362 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6363 kIemNativeGstRegUse_ReadOnly);
6364
6365 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6366 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeExitReason_RaiseGp0);
6367
6368 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6369 return off;
6370}
6371
6372
6373/**
6374 * Emits a call to a CImpl function or something similar.
6375 */
6376DECL_HIDDEN_THROW(uint32_t)
6377iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6378 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6379{
6380 /* Writeback everything. */
6381 off = iemNativeRegFlushPendingWrites(pReNative, off);
6382
6383 /*
6384 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6385 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6386 */
6387 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6388 fGstShwFlush
6389 | RT_BIT_64(kIemNativeGstReg_Pc)
6390 | RT_BIT_64(kIemNativeGstReg_EFlags));
6391 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6392
6393 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6394
6395 /*
6396 * Load the parameters.
6397 */
6398#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6399 /* Special code the hidden VBOXSTRICTRC pointer. */
6400 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6401 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6402 if (cAddParams > 0)
6403 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6404 if (cAddParams > 1)
6405 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6406 if (cAddParams > 2)
6407 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6408 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6409
6410#else
6411 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6412 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6413 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6414 if (cAddParams > 0)
6415 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6416 if (cAddParams > 1)
6417 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6418 if (cAddParams > 2)
6419# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6420 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6421# else
6422 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6423# endif
6424#endif
6425
6426 /*
6427 * Make the call.
6428 */
6429 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6430
6431#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6432 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6433#endif
6434
6435 /*
6436 * Check the status code.
6437 */
6438 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6439}
6440
6441
6442/**
6443 * Emits a call to a threaded worker function.
6444 */
6445DECL_HIDDEN_THROW(uint32_t)
6446iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6447{
6448 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6449
6450 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6451 off = iemNativeRegFlushPendingWrites(pReNative, off);
6452
6453 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6454 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6455
6456#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6457 /* The threaded function may throw / long jmp, so set current instruction
6458 number if we're counting. */
6459 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6460#endif
6461
6462 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6463
6464#ifdef RT_ARCH_AMD64
6465 /* Load the parameters and emit the call. */
6466# ifdef RT_OS_WINDOWS
6467# ifndef VBOXSTRICTRC_STRICT_ENABLED
6468 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6469 if (cParams > 0)
6470 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6471 if (cParams > 1)
6472 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6473 if (cParams > 2)
6474 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6475# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6476 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6477 if (cParams > 0)
6478 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6479 if (cParams > 1)
6480 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6481 if (cParams > 2)
6482 {
6483 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6484 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6485 }
6486 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6487# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6488# else
6489 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6490 if (cParams > 0)
6491 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6492 if (cParams > 1)
6493 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6494 if (cParams > 2)
6495 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6496# endif
6497
6498 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6499
6500# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6501 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6502# endif
6503
6504#elif RT_ARCH_ARM64
6505 /*
6506 * ARM64:
6507 */
6508 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6509 if (cParams > 0)
6510 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6511 if (cParams > 1)
6512 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6513 if (cParams > 2)
6514 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6515
6516 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6517
6518#else
6519# error "port me"
6520#endif
6521
6522 /*
6523 * Check the status code.
6524 */
6525 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6526
6527 return off;
6528}
6529
6530#ifdef VBOX_WITH_STATISTICS
6531
6532/**
6533 * Emits code to update the thread call statistics.
6534 */
6535DECL_INLINE_THROW(uint32_t)
6536iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6537{
6538 /*
6539 * Update threaded function stats.
6540 */
6541 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6542 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6543# if defined(RT_ARCH_ARM64)
6544 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6545 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6546 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6547 iemNativeRegFreeTmp(pReNative, idxTmp1);
6548 iemNativeRegFreeTmp(pReNative, idxTmp2);
6549# else
6550 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6551# endif
6552 return off;
6553}
6554
6555
6556/**
6557 * Emits code to update the TB exit reason statistics.
6558 */
6559DECL_INLINE_THROW(uint32_t)
6560iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6561{
6562 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6563 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6564 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6565 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6566 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6567
6568 return off;
6569}
6570
6571#endif /* VBOX_WITH_STATISTICS */
6572
6573/**
6574 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6575 */
6576static uint32_t
6577iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6578{
6579 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6580 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6581
6582 /* Jump to ReturnBreak if the return register is NULL. */
6583 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6584 true /*f64Bit*/, offReturnBreak);
6585
6586 /* Okay, continue executing the next TB. */
6587 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6588 return off;
6589}
6590
6591#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6592
6593/**
6594 * Worker for iemNativeEmitReturnBreakViaLookup.
6595 */
6596static uint32_t iemNativeEmitViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak,
6597 IEMNATIVELABELTYPE enmLabel, uintptr_t pfnHelper)
6598{
6599 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
6600 if (idxLabel != UINT32_MAX)
6601 {
6602 iemNativeLabelDefine(pReNative, idxLabel, off);
6603 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, pfnHelper);
6604 }
6605 return off;
6606}
6607
6608
6609/**
6610 * Emits the code at the ReturnBreakViaLookup, ReturnBreakViaLookupWithIrq,
6611 * ReturnBreakViaLookupWithTlb and ReturnBreakViaLookupWithTlbAndIrq labels
6612 * (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS or jumps to the next TB).
6613 */
6614static uint32_t iemNativeEmitReturnBreakViaLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnBreakLabel)
6615{
6616 uint32_t const offReturnBreak = pReNative->paLabels[idxReturnBreakLabel].off;
6617 Assert(offReturnBreak < off);
6618
6619 /*
6620 * The lookup table index is in IEMNATIVE_CALL_ARG1_GREG for all.
6621 * The GCPhysPc is in IEMNATIVE_CALL_ARG2_GREG for ReturnBreakViaLookupWithPc.
6622 */
6623 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookup,
6624 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/>);
6625 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
6626 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/>);
6627 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
6628 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/>);
6629 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
6630 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/>);
6631 return off;
6632}
6633
6634#endif /* !IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
6635
6636/**
6637 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6638 */
6639static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6640{
6641 /* set the return status */
6642 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6643}
6644
6645
6646#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6647/**
6648 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6649 */
6650static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6651{
6652 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6653 if (idxLabel != UINT32_MAX)
6654 {
6655 iemNativeLabelDefine(pReNative, idxLabel, off);
6656 /* set the return status */
6657 off = iemNativeEmitCoreReturnWithFlags(pReNative, off);
6658 /* jump back to the return sequence. */
6659 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6660 }
6661 return off;
6662}
6663#endif
6664
6665
6666/**
6667 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6668 */
6669static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6670{
6671 /* set the return status */
6672 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6673}
6674
6675
6676#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6677/**
6678 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6679 */
6680static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6681{
6682 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6683 if (idxLabel != UINT32_MAX)
6684 {
6685 iemNativeLabelDefine(pReNative, idxLabel, off);
6686 /* set the return status */
6687 off = iemNativeEmitCoreReturnBreakFF(pReNative, off);
6688 /* jump back to the return sequence. */
6689 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6690 }
6691 return off;
6692}
6693#endif
6694
6695
6696/**
6697 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6698 */
6699static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6700{
6701 /* set the return status */
6702 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6703}
6704
6705
6706#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6707/**
6708 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6709 */
6710static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6711{
6712 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6713 if (idxLabel != UINT32_MAX)
6714 {
6715 iemNativeLabelDefine(pReNative, idxLabel, off);
6716 /* set the return status */
6717 off = iemNativeEmitCoreReturnBreak(pReNative, off);
6718 /* jump back to the return sequence. */
6719 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6720 }
6721 return off;
6722}
6723#endif
6724
6725
6726/**
6727 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6728 */
6729static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6730{
6731 /*
6732 * Generate the rc + rcPassUp fiddling code.
6733 */
6734 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6735#ifdef RT_ARCH_AMD64
6736# ifdef RT_OS_WINDOWS
6737# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6738 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6739# endif
6740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6741 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6742# else
6743 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6745# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6746 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6747# endif
6748# endif
6749# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6750 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6751# endif
6752
6753#else
6754 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6756 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6757#endif
6758
6759 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6760 return off;
6761}
6762
6763
6764#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6765/**
6766 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6767 */
6768static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6769{
6770 /*
6771 * Generate the rc + rcPassUp fiddling code if needed.
6772 */
6773 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6774 if (idxLabel != UINT32_MAX)
6775 {
6776 iemNativeLabelDefine(pReNative, idxLabel, off);
6777 off = iemNativeEmitCoreRcFiddling(pReNative, off);
6778 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6779 }
6780 return off;
6781}
6782#endif
6783
6784
6785/**
6786 * Emits a standard epilog.
6787 */
6788static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6789{
6790 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6791
6792 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6793
6794 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6795 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6796
6797 /*
6798 * Restore registers and return.
6799 */
6800#ifdef RT_ARCH_AMD64
6801 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6802
6803 /* Reposition esp at the r15 restore point. */
6804 pbCodeBuf[off++] = X86_OP_REX_W;
6805 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6806 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6807 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6808
6809 /* Pop non-volatile registers and return */
6810 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6811 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6812 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6813 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6814 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6815 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6816 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6817 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6818# ifdef RT_OS_WINDOWS
6819 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6820 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6821# endif
6822 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6823 pbCodeBuf[off++] = 0xc9; /* leave */
6824 pbCodeBuf[off++] = 0xc3; /* ret */
6825 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6826
6827#elif RT_ARCH_ARM64
6828 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6829
6830 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6831 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6832 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6833 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6834 IEMNATIVE_FRAME_VAR_SIZE / 8);
6835 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6836 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6837 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6838 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6839 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6840 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6841 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6842 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6843 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6844 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6845 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6846 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6847
6848 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6849 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6850 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6851 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6852
6853 /* retab / ret */
6854# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6855 if (1)
6856 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6857 else
6858# endif
6859 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6860
6861#else
6862# error "port me"
6863#endif
6864 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6865
6866 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6867 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6868
6869 return off;
6870}
6871
6872
6873#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6874/**
6875 * Emits a standard epilog.
6876 */
6877static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6878{
6879 /*
6880 * Define label for common return point.
6881 */
6882 *pidxReturnLabel = UINT32_MAX;
6883 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6884 *pidxReturnLabel = idxReturn;
6885
6886 /*
6887 * Emit the code.
6888 */
6889 return iemNativeEmitCoreEpilog(pReNative, off);
6890}
6891#endif
6892
6893
6894#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
6895/**
6896 * Emits a standard prolog.
6897 */
6898static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6899{
6900#ifdef RT_ARCH_AMD64
6901 /*
6902 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6903 * reserving 64 bytes for stack variables plus 4 non-register argument
6904 * slots. Fixed register assignment: xBX = pReNative;
6905 *
6906 * Since we always do the same register spilling, we can use the same
6907 * unwind description for all the code.
6908 */
6909 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6910 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6911 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6912 pbCodeBuf[off++] = 0x8b;
6913 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6914 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6915 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6916# ifdef RT_OS_WINDOWS
6917 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6918 pbCodeBuf[off++] = 0x8b;
6919 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6920 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6921 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6922# else
6923 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6924 pbCodeBuf[off++] = 0x8b;
6925 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6926# endif
6927 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6928 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6929 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6930 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6931 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6932 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6933 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6934 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6935
6936# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6937 /* Save the frame pointer. */
6938 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6939# endif
6940
6941 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6942 X86_GREG_xSP,
6943 IEMNATIVE_FRAME_ALIGN_SIZE
6944 + IEMNATIVE_FRAME_VAR_SIZE
6945 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6946 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6947 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6948 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6949 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6950
6951#elif RT_ARCH_ARM64
6952 /*
6953 * We set up a stack frame exactly like on x86, only we have to push the
6954 * return address our selves here. We save all non-volatile registers.
6955 */
6956 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6957
6958# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6959 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6960 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6961 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6962 /* pacibsp */
6963 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6964# endif
6965
6966 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6967 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6968 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6969 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6970 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6971 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6972 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6973 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6974 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6975 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6976 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6977 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6978 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6979 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6980 /* Save the BP and LR (ret address) registers at the top of the frame. */
6981 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6982 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6983 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6984 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6985 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6986 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6987
6988 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6989 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6990
6991 /* mov r28, r0 */
6992 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6993 /* mov r27, r1 */
6994 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6995
6996# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6997 /* Save the frame pointer. */
6998 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6999 ARMV8_A64_REG_X2);
7000# endif
7001
7002#else
7003# error "port me"
7004#endif
7005 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7006 return off;
7007}
7008#endif
7009
7010
7011/*********************************************************************************************************************************
7012* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7013*********************************************************************************************************************************/
7014
7015/**
7016 * Internal work that allocates a variable with kind set to
7017 * kIemNativeVarKind_Invalid and no current stack allocation.
7018 *
7019 * The kind will either be set by the caller or later when the variable is first
7020 * assigned a value.
7021 *
7022 * @returns Unpacked index.
7023 * @internal
7024 */
7025static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7026{
7027 Assert(cbType > 0 && cbType <= 64);
7028 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7029 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7030 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7031 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7032 pReNative->Core.aVars[idxVar].cbVar = cbType;
7033 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7034 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7035 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7036 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7037 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7038 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7039 pReNative->Core.aVars[idxVar].u.uValue = 0;
7040#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7041 pReNative->Core.aVars[idxVar].fSimdReg = false;
7042#endif
7043 return idxVar;
7044}
7045
7046
7047/**
7048 * Internal work that allocates an argument variable w/o setting enmKind.
7049 *
7050 * @returns Unpacked index.
7051 * @internal
7052 */
7053static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7054{
7055 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7056 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7057 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7058
7059 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7060 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7061 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7062 return idxVar;
7063}
7064
7065
7066/**
7067 * Gets the stack slot for a stack variable, allocating one if necessary.
7068 *
7069 * Calling this function implies that the stack slot will contain a valid
7070 * variable value. The caller deals with any register currently assigned to the
7071 * variable, typically by spilling it into the stack slot.
7072 *
7073 * @returns The stack slot number.
7074 * @param pReNative The recompiler state.
7075 * @param idxVar The variable.
7076 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7077 */
7078DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7079{
7080 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7081 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7082 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7083
7084 /* Already got a slot? */
7085 uint8_t const idxStackSlot = pVar->idxStackSlot;
7086 if (idxStackSlot != UINT8_MAX)
7087 {
7088 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7089 return idxStackSlot;
7090 }
7091
7092 /*
7093 * A single slot is easy to allocate.
7094 * Allocate them from the top end, closest to BP, to reduce the displacement.
7095 */
7096 if (pVar->cbVar <= sizeof(uint64_t))
7097 {
7098 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7099 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7100 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7101 pVar->idxStackSlot = (uint8_t)iSlot;
7102 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7103 return (uint8_t)iSlot;
7104 }
7105
7106 /*
7107 * We need more than one stack slot.
7108 *
7109 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7110 */
7111 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7112 Assert(pVar->cbVar <= 64);
7113 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7114 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7115 uint32_t bmStack = pReNative->Core.bmStack;
7116 while (bmStack != UINT32_MAX)
7117 {
7118 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7119 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7120 iSlot = (iSlot - 1) & ~fBitAlignMask;
7121 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7122 {
7123 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7124 pVar->idxStackSlot = (uint8_t)iSlot;
7125 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7126 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7127 return (uint8_t)iSlot;
7128 }
7129
7130 bmStack |= (fBitAllocMask << iSlot);
7131 }
7132 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7133}
7134
7135
7136/**
7137 * Changes the variable to a stack variable.
7138 *
7139 * Currently this is s only possible to do the first time the variable is used,
7140 * switching later is can be implemented but not done.
7141 *
7142 * @param pReNative The recompiler state.
7143 * @param idxVar The variable.
7144 * @throws VERR_IEM_VAR_IPE_2
7145 */
7146DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7147{
7148 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7149 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7150 if (pVar->enmKind != kIemNativeVarKind_Stack)
7151 {
7152 /* We could in theory transition from immediate to stack as well, but it
7153 would involve the caller doing work storing the value on the stack. So,
7154 till that's required we only allow transition from invalid. */
7155 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7156 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7157 pVar->enmKind = kIemNativeVarKind_Stack;
7158
7159 /* Note! We don't allocate a stack slot here, that's only done when a
7160 slot is actually needed to hold a variable value. */
7161 }
7162}
7163
7164
7165/**
7166 * Sets it to a variable with a constant value.
7167 *
7168 * This does not require stack storage as we know the value and can always
7169 * reload it, unless of course it's referenced.
7170 *
7171 * @param pReNative The recompiler state.
7172 * @param idxVar The variable.
7173 * @param uValue The immediate value.
7174 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7175 */
7176DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7177{
7178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7179 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7180 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7181 {
7182 /* Only simple transitions for now. */
7183 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7184 pVar->enmKind = kIemNativeVarKind_Immediate;
7185 }
7186 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7187
7188 pVar->u.uValue = uValue;
7189 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7190 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7191 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7192}
7193
7194
7195/**
7196 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7197 *
7198 * This does not require stack storage as we know the value and can always
7199 * reload it. Loading is postponed till needed.
7200 *
7201 * @param pReNative The recompiler state.
7202 * @param idxVar The variable. Unpacked.
7203 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7204 *
7205 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7206 * @internal
7207 */
7208static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7209{
7210 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7211 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7212
7213 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7214 {
7215 /* Only simple transitions for now. */
7216 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7217 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7218 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7219 }
7220 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7221
7222 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7223
7224 /* Update the other variable, ensure it's a stack variable. */
7225 /** @todo handle variables with const values... that'll go boom now. */
7226 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7227 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7228}
7229
7230
7231/**
7232 * Sets the variable to a reference (pointer) to a guest register reference.
7233 *
7234 * This does not require stack storage as we know the value and can always
7235 * reload it. Loading is postponed till needed.
7236 *
7237 * @param pReNative The recompiler state.
7238 * @param idxVar The variable.
7239 * @param enmRegClass The class guest registers to reference.
7240 * @param idxReg The register within @a enmRegClass to reference.
7241 *
7242 * @throws VERR_IEM_VAR_IPE_2
7243 */
7244DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7245 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7246{
7247 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7248 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7249
7250 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7251 {
7252 /* Only simple transitions for now. */
7253 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7254 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7255 }
7256 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7257
7258 pVar->u.GstRegRef.enmClass = enmRegClass;
7259 pVar->u.GstRegRef.idx = idxReg;
7260}
7261
7262
7263DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7264{
7265 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7266}
7267
7268
7269DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7270{
7271 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7272
7273 /* Since we're using a generic uint64_t value type, we must truncate it if
7274 the variable is smaller otherwise we may end up with too large value when
7275 scaling up a imm8 w/ sign-extension.
7276
7277 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7278 in the bios, bx=1) when running on arm, because clang expect 16-bit
7279 register parameters to have bits 16 and up set to zero. Instead of
7280 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7281 CF value in the result. */
7282 switch (cbType)
7283 {
7284 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7285 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7286 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7287 }
7288 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7289 return idxVar;
7290}
7291
7292
7293DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7294{
7295 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7296 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7297 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7298 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7299 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7300 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7301
7302 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7303 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7304 return idxArgVar;
7305}
7306
7307
7308DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7309{
7310 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7311 /* Don't set to stack now, leave that to the first use as for instance
7312 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7313 return idxVar;
7314}
7315
7316
7317DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7318{
7319 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7320
7321 /* Since we're using a generic uint64_t value type, we must truncate it if
7322 the variable is smaller otherwise we may end up with too large value when
7323 scaling up a imm8 w/ sign-extension. */
7324 switch (cbType)
7325 {
7326 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7327 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7328 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7329 }
7330 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7331 return idxVar;
7332}
7333
7334
7335DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
7336{
7337 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7338 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7339
7340 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7341 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7342
7343 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7344
7345 /* Truncate the value to this variables size. */
7346 switch (cbType)
7347 {
7348 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7349 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7350 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7351 }
7352
7353 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7354 iemNativeVarRegisterRelease(pReNative, idxVar);
7355 return idxVar;
7356}
7357
7358
7359/**
7360 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7361 * fixed till we call iemNativeVarRegisterRelease.
7362 *
7363 * @returns The host register number.
7364 * @param pReNative The recompiler state.
7365 * @param idxVar The variable.
7366 * @param poff Pointer to the instruction buffer offset.
7367 * In case a register needs to be freed up or the value
7368 * loaded off the stack.
7369 * @param fInitialized Set if the variable must already have been initialized.
7370 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7371 * the case.
7372 * @param idxRegPref Preferred register number or UINT8_MAX.
7373 */
7374DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7375 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7376{
7377 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7378 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7379 Assert(pVar->cbVar <= 8);
7380 Assert(!pVar->fRegAcquired);
7381
7382 uint8_t idxReg = pVar->idxReg;
7383 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7384 {
7385 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7386 && pVar->enmKind < kIemNativeVarKind_End);
7387 pVar->fRegAcquired = true;
7388 return idxReg;
7389 }
7390
7391 /*
7392 * If the kind of variable has not yet been set, default to 'stack'.
7393 */
7394 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7395 && pVar->enmKind < kIemNativeVarKind_End);
7396 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7397 iemNativeVarSetKindToStack(pReNative, idxVar);
7398
7399 /*
7400 * We have to allocate a register for the variable, even if its a stack one
7401 * as we don't know if there are modification being made to it before its
7402 * finalized (todo: analyze and insert hints about that?).
7403 *
7404 * If we can, we try get the correct register for argument variables. This
7405 * is assuming that most argument variables are fetched as close as possible
7406 * to the actual call, so that there aren't any interfering hidden calls
7407 * (memory accesses, etc) inbetween.
7408 *
7409 * If we cannot or it's a variable, we make sure no argument registers
7410 * that will be used by this MC block will be allocated here, and we always
7411 * prefer non-volatile registers to avoid needing to spill stuff for internal
7412 * call.
7413 */
7414 /** @todo Detect too early argument value fetches and warn about hidden
7415 * calls causing less optimal code to be generated in the python script. */
7416
7417 uint8_t const uArgNo = pVar->uArgNo;
7418 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7419 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7420 {
7421 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7422
7423#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7424 /* Writeback any dirty shadow registers we are about to unshadow. */
7425 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7426#endif
7427
7428 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7429 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7430 }
7431 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7432 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7433 {
7434 /** @todo there must be a better way for this and boot cArgsX? */
7435 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7436 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7437 & ~pReNative->Core.bmHstRegsWithGstShadow
7438 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7439 & fNotArgsMask;
7440 if (fRegs)
7441 {
7442 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7443 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7444 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7445 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7446 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7447 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7448 }
7449 else
7450 {
7451 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7452 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7453 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7454 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7455 }
7456 }
7457 else
7458 {
7459 idxReg = idxRegPref;
7460 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7461 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7462 }
7463 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7464 pVar->idxReg = idxReg;
7465
7466#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7467 pVar->fSimdReg = false;
7468#endif
7469
7470 /*
7471 * Load it off the stack if we've got a stack slot.
7472 */
7473 uint8_t const idxStackSlot = pVar->idxStackSlot;
7474 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7475 {
7476 Assert(fInitialized);
7477 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7478 switch (pVar->cbVar)
7479 {
7480 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7481 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7482 case 3: AssertFailed(); RT_FALL_THRU();
7483 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7484 default: AssertFailed(); RT_FALL_THRU();
7485 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7486 }
7487 }
7488 else
7489 {
7490 Assert(idxStackSlot == UINT8_MAX);
7491 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7492 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7493 else
7494 {
7495 /*
7496 * Convert from immediate to stack/register. This is currently only
7497 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7498 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7499 */
7500 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7501 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7502 idxVar, idxReg, pVar->u.uValue));
7503 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7504 pVar->enmKind = kIemNativeVarKind_Stack;
7505 }
7506 }
7507
7508 pVar->fRegAcquired = true;
7509 return idxReg;
7510}
7511
7512
7513#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7514/**
7515 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7516 * fixed till we call iemNativeVarRegisterRelease.
7517 *
7518 * @returns The host register number.
7519 * @param pReNative The recompiler state.
7520 * @param idxVar The variable.
7521 * @param poff Pointer to the instruction buffer offset.
7522 * In case a register needs to be freed up or the value
7523 * loaded off the stack.
7524 * @param fInitialized Set if the variable must already have been initialized.
7525 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7526 * the case.
7527 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7528 */
7529DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7530 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7531{
7532 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7533 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7534 Assert( pVar->cbVar == sizeof(RTUINT128U)
7535 || pVar->cbVar == sizeof(RTUINT256U));
7536 Assert(!pVar->fRegAcquired);
7537
7538 uint8_t idxReg = pVar->idxReg;
7539 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7540 {
7541 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7542 && pVar->enmKind < kIemNativeVarKind_End);
7543 pVar->fRegAcquired = true;
7544 return idxReg;
7545 }
7546
7547 /*
7548 * If the kind of variable has not yet been set, default to 'stack'.
7549 */
7550 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7551 && pVar->enmKind < kIemNativeVarKind_End);
7552 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7553 iemNativeVarSetKindToStack(pReNative, idxVar);
7554
7555 /*
7556 * We have to allocate a register for the variable, even if its a stack one
7557 * as we don't know if there are modification being made to it before its
7558 * finalized (todo: analyze and insert hints about that?).
7559 *
7560 * If we can, we try get the correct register for argument variables. This
7561 * is assuming that most argument variables are fetched as close as possible
7562 * to the actual call, so that there aren't any interfering hidden calls
7563 * (memory accesses, etc) inbetween.
7564 *
7565 * If we cannot or it's a variable, we make sure no argument registers
7566 * that will be used by this MC block will be allocated here, and we always
7567 * prefer non-volatile registers to avoid needing to spill stuff for internal
7568 * call.
7569 */
7570 /** @todo Detect too early argument value fetches and warn about hidden
7571 * calls causing less optimal code to be generated in the python script. */
7572
7573 uint8_t const uArgNo = pVar->uArgNo;
7574 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7575
7576 /* SIMD is bit simpler for now because there is no support for arguments. */
7577 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7578 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7579 {
7580 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7581 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7582 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7583 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7584 & fNotArgsMask;
7585 if (fRegs)
7586 {
7587 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7588 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7589 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7590 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7591 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7592 }
7593 else
7594 {
7595 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7596 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7597 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7598 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7599 }
7600 }
7601 else
7602 {
7603 idxReg = idxRegPref;
7604 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7605 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7606 }
7607 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7608
7609 pVar->fSimdReg = true;
7610 pVar->idxReg = idxReg;
7611
7612 /*
7613 * Load it off the stack if we've got a stack slot.
7614 */
7615 uint8_t const idxStackSlot = pVar->idxStackSlot;
7616 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7617 {
7618 Assert(fInitialized);
7619 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7620 switch (pVar->cbVar)
7621 {
7622 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7623 default: AssertFailed(); RT_FALL_THRU();
7624 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7625 }
7626 }
7627 else
7628 {
7629 Assert(idxStackSlot == UINT8_MAX);
7630 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7631 }
7632 pVar->fRegAcquired = true;
7633 return idxReg;
7634}
7635#endif
7636
7637
7638/**
7639 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7640 * guest register.
7641 *
7642 * This function makes sure there is a register for it and sets it to be the
7643 * current shadow copy of @a enmGstReg.
7644 *
7645 * @returns The host register number.
7646 * @param pReNative The recompiler state.
7647 * @param idxVar The variable.
7648 * @param enmGstReg The guest register this variable will be written to
7649 * after this call.
7650 * @param poff Pointer to the instruction buffer offset.
7651 * In case a register needs to be freed up or if the
7652 * variable content needs to be loaded off the stack.
7653 *
7654 * @note We DO NOT expect @a idxVar to be an argument variable,
7655 * because we can only in the commit stage of an instruction when this
7656 * function is used.
7657 */
7658DECL_HIDDEN_THROW(uint8_t)
7659iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7660{
7661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7662 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7663 Assert(!pVar->fRegAcquired);
7664 AssertMsgStmt( pVar->cbVar <= 8
7665 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7666 || pVar->enmKind == kIemNativeVarKind_Stack),
7667 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7668 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7669 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7670
7671 /*
7672 * This shouldn't ever be used for arguments, unless it's in a weird else
7673 * branch that doesn't do any calling and even then it's questionable.
7674 *
7675 * However, in case someone writes crazy wrong MC code and does register
7676 * updates before making calls, just use the regular register allocator to
7677 * ensure we get a register suitable for the intended argument number.
7678 */
7679 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7680
7681 /*
7682 * If there is already a register for the variable, we transfer/set the
7683 * guest shadow copy assignment to it.
7684 */
7685 uint8_t idxReg = pVar->idxReg;
7686 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7687 {
7688#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7689 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7690 {
7691# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7692 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7693 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7694# endif
7695 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7696 }
7697#endif
7698
7699 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7700 {
7701 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7702 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7703 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7704 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7705 }
7706 else
7707 {
7708 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7709 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7710 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7711 }
7712 /** @todo figure this one out. We need some way of making sure the register isn't
7713 * modified after this point, just in case we start writing crappy MC code. */
7714 pVar->enmGstReg = enmGstReg;
7715 pVar->fRegAcquired = true;
7716 return idxReg;
7717 }
7718 Assert(pVar->uArgNo == UINT8_MAX);
7719
7720 /*
7721 * Because this is supposed to be the commit stage, we're just tag along with the
7722 * temporary register allocator and upgrade it to a variable register.
7723 */
7724 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7725 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7726 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7727 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7728 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7729 pVar->idxReg = idxReg;
7730
7731 /*
7732 * Now we need to load the register value.
7733 */
7734 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7735 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7736 else
7737 {
7738 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7739 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7740 switch (pVar->cbVar)
7741 {
7742 case sizeof(uint64_t):
7743 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7744 break;
7745 case sizeof(uint32_t):
7746 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7747 break;
7748 case sizeof(uint16_t):
7749 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7750 break;
7751 case sizeof(uint8_t):
7752 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7753 break;
7754 default:
7755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7756 }
7757 }
7758
7759 pVar->fRegAcquired = true;
7760 return idxReg;
7761}
7762
7763
7764/**
7765 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7766 *
7767 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7768 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7769 * requirement of flushing anything in volatile host registers when making a
7770 * call.
7771 *
7772 * @returns New @a off value.
7773 * @param pReNative The recompiler state.
7774 * @param off The code buffer position.
7775 * @param fHstRegsNotToSave Set of registers not to save & restore.
7776 */
7777DECL_HIDDEN_THROW(uint32_t)
7778iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7779{
7780 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7781 if (fHstRegs)
7782 {
7783 do
7784 {
7785 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7786 fHstRegs &= ~RT_BIT_32(idxHstReg);
7787
7788 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7789 {
7790 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7792 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7793 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7794 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7795 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7796 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7797 {
7798 case kIemNativeVarKind_Stack:
7799 {
7800 /* Temporarily spill the variable register. */
7801 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7802 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7803 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7804 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7805 continue;
7806 }
7807
7808 case kIemNativeVarKind_Immediate:
7809 case kIemNativeVarKind_VarRef:
7810 case kIemNativeVarKind_GstRegRef:
7811 /* It is weird to have any of these loaded at this point. */
7812 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7813 continue;
7814
7815 case kIemNativeVarKind_End:
7816 case kIemNativeVarKind_Invalid:
7817 break;
7818 }
7819 AssertFailed();
7820 }
7821 else
7822 {
7823 /*
7824 * Allocate a temporary stack slot and spill the register to it.
7825 */
7826 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7827 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7828 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7829 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7830 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7831 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7832 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7833 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7834 }
7835 } while (fHstRegs);
7836 }
7837#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7838
7839 /*
7840 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7841 * which would be more difficult due to spanning multiple stack slots and different sizes
7842 * (besides we only have a limited amount of slots at the moment).
7843 *
7844 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7845 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7846 */
7847 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7848
7849 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7850 if (fHstRegs)
7851 {
7852 do
7853 {
7854 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7855 fHstRegs &= ~RT_BIT_32(idxHstReg);
7856
7857 /* Fixed reserved and temporary registers don't need saving. */
7858 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7859 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7860 continue;
7861
7862 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7863
7864 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7865 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7866 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7867 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7868 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7869 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7870 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7871 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7872 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7873 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7874 {
7875 case kIemNativeVarKind_Stack:
7876 {
7877 /* Temporarily spill the variable register. */
7878 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7879 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7880 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7881 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7882 if (cbVar == sizeof(RTUINT128U))
7883 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7884 else
7885 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7886 continue;
7887 }
7888
7889 case kIemNativeVarKind_Immediate:
7890 case kIemNativeVarKind_VarRef:
7891 case kIemNativeVarKind_GstRegRef:
7892 /* It is weird to have any of these loaded at this point. */
7893 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7894 continue;
7895
7896 case kIemNativeVarKind_End:
7897 case kIemNativeVarKind_Invalid:
7898 break;
7899 }
7900 AssertFailed();
7901 } while (fHstRegs);
7902 }
7903#endif
7904 return off;
7905}
7906
7907
7908/**
7909 * Emit code to restore volatile registers after to a call to a helper.
7910 *
7911 * @returns New @a off value.
7912 * @param pReNative The recompiler state.
7913 * @param off The code buffer position.
7914 * @param fHstRegsNotToSave Set of registers not to save & restore.
7915 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7916 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7917 */
7918DECL_HIDDEN_THROW(uint32_t)
7919iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7920{
7921 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7922 if (fHstRegs)
7923 {
7924 do
7925 {
7926 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7927 fHstRegs &= ~RT_BIT_32(idxHstReg);
7928
7929 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7930 {
7931 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7932 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7933 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7934 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7935 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7936 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7937 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7938 {
7939 case kIemNativeVarKind_Stack:
7940 {
7941 /* Unspill the variable register. */
7942 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7943 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7944 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7945 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7946 continue;
7947 }
7948
7949 case kIemNativeVarKind_Immediate:
7950 case kIemNativeVarKind_VarRef:
7951 case kIemNativeVarKind_GstRegRef:
7952 /* It is weird to have any of these loaded at this point. */
7953 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7954 continue;
7955
7956 case kIemNativeVarKind_End:
7957 case kIemNativeVarKind_Invalid:
7958 break;
7959 }
7960 AssertFailed();
7961 }
7962 else
7963 {
7964 /*
7965 * Restore from temporary stack slot.
7966 */
7967 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7968 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7969 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7970 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7971
7972 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7973 }
7974 } while (fHstRegs);
7975 }
7976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7977 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7978 if (fHstRegs)
7979 {
7980 do
7981 {
7982 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7983 fHstRegs &= ~RT_BIT_32(idxHstReg);
7984
7985 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7986 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7987 continue;
7988 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7989
7990 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7991 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7992 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7993 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7994 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7995 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7996 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7997 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7998 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7999 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8000 {
8001 case kIemNativeVarKind_Stack:
8002 {
8003 /* Unspill the variable register. */
8004 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8005 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8006 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8007 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8008
8009 if (cbVar == sizeof(RTUINT128U))
8010 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8011 else
8012 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8013 continue;
8014 }
8015
8016 case kIemNativeVarKind_Immediate:
8017 case kIemNativeVarKind_VarRef:
8018 case kIemNativeVarKind_GstRegRef:
8019 /* It is weird to have any of these loaded at this point. */
8020 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8021 continue;
8022
8023 case kIemNativeVarKind_End:
8024 case kIemNativeVarKind_Invalid:
8025 break;
8026 }
8027 AssertFailed();
8028 } while (fHstRegs);
8029 }
8030#endif
8031 return off;
8032}
8033
8034
8035/**
8036 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8037 *
8038 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8039 *
8040 * ASSUMES that @a idxVar is valid and unpacked.
8041 */
8042DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8043{
8044 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8045 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8046 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8047 {
8048 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8049 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8050 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8051 Assert(cSlots > 0);
8052 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8053 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8054 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8055 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8056 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8057 }
8058 else
8059 Assert(idxStackSlot == UINT8_MAX);
8060}
8061
8062
8063/**
8064 * Worker that frees a single variable.
8065 *
8066 * ASSUMES that @a idxVar is valid and unpacked.
8067 */
8068DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8069{
8070 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8071 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8072 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8073
8074 /* Free the host register first if any assigned. */
8075 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8076#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8077 if ( idxHstReg != UINT8_MAX
8078 && pReNative->Core.aVars[idxVar].fSimdReg)
8079 {
8080 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8081 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8082 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8083 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8084 }
8085 else
8086#endif
8087 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8088 {
8089 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8090 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8091 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8092 }
8093
8094 /* Free argument mapping. */
8095 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8096 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8097 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8098
8099 /* Free the stack slots. */
8100 iemNativeVarFreeStackSlots(pReNative, idxVar);
8101
8102 /* Free the actual variable. */
8103 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8104 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8105}
8106
8107
8108/**
8109 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8110 */
8111DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8112{
8113 while (bmVars != 0)
8114 {
8115 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8116 bmVars &= ~RT_BIT_32(idxVar);
8117
8118#if 1 /** @todo optimize by simplifying this later... */
8119 iemNativeVarFreeOneWorker(pReNative, idxVar);
8120#else
8121 /* Only need to free the host register, the rest is done as bulk updates below. */
8122 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8123 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8124 {
8125 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8126 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8127 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8128 }
8129#endif
8130 }
8131#if 0 /** @todo optimize by simplifying this later... */
8132 pReNative->Core.bmVars = 0;
8133 pReNative->Core.bmStack = 0;
8134 pReNative->Core.u64ArgVars = UINT64_MAX;
8135#endif
8136}
8137
8138
8139
8140/*********************************************************************************************************************************
8141* Emitters for IEM_MC_CALL_CIMPL_XXX *
8142*********************************************************************************************************************************/
8143
8144/**
8145 * Emits code to load a reference to the given guest register into @a idxGprDst.
8146 */
8147DECL_HIDDEN_THROW(uint32_t)
8148iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8149 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8150{
8151#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8152 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8153#endif
8154
8155 /*
8156 * Get the offset relative to the CPUMCTX structure.
8157 */
8158 uint32_t offCpumCtx;
8159 switch (enmClass)
8160 {
8161 case kIemNativeGstRegRef_Gpr:
8162 Assert(idxRegInClass < 16);
8163 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8164 break;
8165
8166 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8167 Assert(idxRegInClass < 4);
8168 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8169 break;
8170
8171 case kIemNativeGstRegRef_EFlags:
8172 Assert(idxRegInClass == 0);
8173 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8174 break;
8175
8176 case kIemNativeGstRegRef_MxCsr:
8177 Assert(idxRegInClass == 0);
8178 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8179 break;
8180
8181 case kIemNativeGstRegRef_FpuReg:
8182 Assert(idxRegInClass < 8);
8183 AssertFailed(); /** @todo what kind of indexing? */
8184 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8185 break;
8186
8187 case kIemNativeGstRegRef_MReg:
8188 Assert(idxRegInClass < 8);
8189 AssertFailed(); /** @todo what kind of indexing? */
8190 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8191 break;
8192
8193 case kIemNativeGstRegRef_XReg:
8194 Assert(idxRegInClass < 16);
8195 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8196 break;
8197
8198 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8199 Assert(idxRegInClass == 0);
8200 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8201 break;
8202
8203 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8204 Assert(idxRegInClass == 0);
8205 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8206 break;
8207
8208 default:
8209 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8210 }
8211
8212 /*
8213 * Load the value into the destination register.
8214 */
8215#ifdef RT_ARCH_AMD64
8216 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8217
8218#elif defined(RT_ARCH_ARM64)
8219 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8220 Assert(offCpumCtx < 4096);
8221 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8222
8223#else
8224# error "Port me!"
8225#endif
8226
8227 return off;
8228}
8229
8230
8231/**
8232 * Common code for CIMPL and AIMPL calls.
8233 *
8234 * These are calls that uses argument variables and such. They should not be
8235 * confused with internal calls required to implement an MC operation,
8236 * like a TLB load and similar.
8237 *
8238 * Upon return all that is left to do is to load any hidden arguments and
8239 * perform the call. All argument variables are freed.
8240 *
8241 * @returns New code buffer offset; throws VBox status code on error.
8242 * @param pReNative The native recompile state.
8243 * @param off The code buffer offset.
8244 * @param cArgs The total nubmer of arguments (includes hidden
8245 * count).
8246 * @param cHiddenArgs The number of hidden arguments. The hidden
8247 * arguments must not have any variable declared for
8248 * them, whereas all the regular arguments must
8249 * (tstIEMCheckMc ensures this).
8250 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8251 * this will still flush pending writes in call volatile registers if false.
8252 */
8253DECL_HIDDEN_THROW(uint32_t)
8254iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8255 bool fFlushPendingWrites /*= true*/)
8256{
8257#ifdef VBOX_STRICT
8258 /*
8259 * Assert sanity.
8260 */
8261 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8262 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8263 for (unsigned i = 0; i < cHiddenArgs; i++)
8264 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8265 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8266 {
8267 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8268 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8269 }
8270 iemNativeRegAssertSanity(pReNative);
8271#endif
8272
8273 /* We don't know what the called function makes use of, so flush any pending register writes. */
8274 RT_NOREF(fFlushPendingWrites);
8275#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8276 if (fFlushPendingWrites)
8277#endif
8278 off = iemNativeRegFlushPendingWrites(pReNative, off);
8279
8280 /*
8281 * Before we do anything else, go over variables that are referenced and
8282 * make sure they are not in a register.
8283 */
8284 uint32_t bmVars = pReNative->Core.bmVars;
8285 if (bmVars)
8286 {
8287 do
8288 {
8289 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8290 bmVars &= ~RT_BIT_32(idxVar);
8291
8292 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8293 {
8294 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8295#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8296 if ( idxRegOld != UINT8_MAX
8297 && pReNative->Core.aVars[idxVar].fSimdReg)
8298 {
8299 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8300 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8301
8302 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8303 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8304 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8305 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8306 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8307 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8308 else
8309 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8310
8311 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8312 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8313
8314 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8315 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8316 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8317 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8318 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8319 }
8320 else
8321#endif
8322 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8323 {
8324 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8325 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8326 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8327 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8328 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8329
8330 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8331 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8332 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8333 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8334 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8335 }
8336 }
8337 } while (bmVars != 0);
8338#if 0 //def VBOX_STRICT
8339 iemNativeRegAssertSanity(pReNative);
8340#endif
8341 }
8342
8343 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8344
8345#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8346 /*
8347 * At the very first step go over the host registers that will be used for arguments
8348 * don't shadow anything which needs writing back first.
8349 */
8350 for (uint32_t i = 0; i < cRegArgs; i++)
8351 {
8352 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8353
8354 /* Writeback any dirty guest shadows before using this register. */
8355 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8356 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8357 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8358 }
8359#endif
8360
8361 /*
8362 * First, go over the host registers that will be used for arguments and make
8363 * sure they either hold the desired argument or are free.
8364 */
8365 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8366 {
8367 for (uint32_t i = 0; i < cRegArgs; i++)
8368 {
8369 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8370 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8371 {
8372 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8373 {
8374 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8375 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8376 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8377 Assert(pVar->idxReg == idxArgReg);
8378 uint8_t const uArgNo = pVar->uArgNo;
8379 if (uArgNo == i)
8380 { /* prefect */ }
8381 /* The variable allocator logic should make sure this is impossible,
8382 except for when the return register is used as a parameter (ARM,
8383 but not x86). */
8384#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8385 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8386 {
8387# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8388# error "Implement this"
8389# endif
8390 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8391 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8392 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8393 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8394 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8395 }
8396#endif
8397 else
8398 {
8399 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8400
8401 if (pVar->enmKind == kIemNativeVarKind_Stack)
8402 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8403 else
8404 {
8405 /* just free it, can be reloaded if used again */
8406 pVar->idxReg = UINT8_MAX;
8407 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8408 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8409 }
8410 }
8411 }
8412 else
8413 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8414 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8415 }
8416 }
8417#if 0 //def VBOX_STRICT
8418 iemNativeRegAssertSanity(pReNative);
8419#endif
8420 }
8421
8422 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8423
8424#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8425 /*
8426 * If there are any stack arguments, make sure they are in their place as well.
8427 *
8428 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8429 * the caller) be loading it later and it must be free (see first loop).
8430 */
8431 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8432 {
8433 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8434 {
8435 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8436 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8437 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8438 {
8439 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8440 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8441 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8442 pVar->idxReg = UINT8_MAX;
8443 }
8444 else
8445 {
8446 /* Use ARG0 as temp for stuff we need registers for. */
8447 switch (pVar->enmKind)
8448 {
8449 case kIemNativeVarKind_Stack:
8450 {
8451 uint8_t const idxStackSlot = pVar->idxStackSlot;
8452 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8453 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8454 iemNativeStackCalcBpDisp(idxStackSlot));
8455 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8456 continue;
8457 }
8458
8459 case kIemNativeVarKind_Immediate:
8460 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8461 continue;
8462
8463 case kIemNativeVarKind_VarRef:
8464 {
8465 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8466 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8467 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8468 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8469 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8470# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8471 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8472 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8473 if ( fSimdReg
8474 && idxRegOther != UINT8_MAX)
8475 {
8476 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8477 if (cbVar == sizeof(RTUINT128U))
8478 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8479 else
8480 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8481 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8482 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8483 }
8484 else
8485# endif
8486 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8487 {
8488 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8489 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8490 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8491 }
8492 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8493 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8494 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8495 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8496 continue;
8497 }
8498
8499 case kIemNativeVarKind_GstRegRef:
8500 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8501 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8502 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8503 continue;
8504
8505 case kIemNativeVarKind_Invalid:
8506 case kIemNativeVarKind_End:
8507 break;
8508 }
8509 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8510 }
8511 }
8512# if 0 //def VBOX_STRICT
8513 iemNativeRegAssertSanity(pReNative);
8514# endif
8515 }
8516#else
8517 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8518#endif
8519
8520 /*
8521 * Make sure the argument variables are loaded into their respective registers.
8522 *
8523 * We can optimize this by ASSUMING that any register allocations are for
8524 * registeres that have already been loaded and are ready. The previous step
8525 * saw to that.
8526 */
8527 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8528 {
8529 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8530 {
8531 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8532 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8533 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8534 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8535 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8536 else
8537 {
8538 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8539 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8540 {
8541 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8542 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8543 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8544 | RT_BIT_32(idxArgReg);
8545 pVar->idxReg = idxArgReg;
8546 }
8547 else
8548 {
8549 /* Use ARG0 as temp for stuff we need registers for. */
8550 switch (pVar->enmKind)
8551 {
8552 case kIemNativeVarKind_Stack:
8553 {
8554 uint8_t const idxStackSlot = pVar->idxStackSlot;
8555 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8556 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8557 continue;
8558 }
8559
8560 case kIemNativeVarKind_Immediate:
8561 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8562 continue;
8563
8564 case kIemNativeVarKind_VarRef:
8565 {
8566 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8567 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8568 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8569 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8570 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8571 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8572#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8573 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8574 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8575 if ( fSimdReg
8576 && idxRegOther != UINT8_MAX)
8577 {
8578 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8579 if (cbVar == sizeof(RTUINT128U))
8580 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8581 else
8582 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8583 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8584 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8585 }
8586 else
8587#endif
8588 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8589 {
8590 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8591 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8592 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8593 }
8594 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8595 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8596 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8597 continue;
8598 }
8599
8600 case kIemNativeVarKind_GstRegRef:
8601 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8602 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8603 continue;
8604
8605 case kIemNativeVarKind_Invalid:
8606 case kIemNativeVarKind_End:
8607 break;
8608 }
8609 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8610 }
8611 }
8612 }
8613#if 0 //def VBOX_STRICT
8614 iemNativeRegAssertSanity(pReNative);
8615#endif
8616 }
8617#ifdef VBOX_STRICT
8618 else
8619 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8620 {
8621 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8622 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8623 }
8624#endif
8625
8626 /*
8627 * Free all argument variables (simplified).
8628 * Their lifetime always expires with the call they are for.
8629 */
8630 /** @todo Make the python script check that arguments aren't used after
8631 * IEM_MC_CALL_XXXX. */
8632 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8633 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8634 * an argument value. There is also some FPU stuff. */
8635 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8636 {
8637 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8638 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8639
8640 /* no need to free registers: */
8641 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8642 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8643 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8644 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8645 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8646 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8647
8648 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8649 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8650 iemNativeVarFreeStackSlots(pReNative, idxVar);
8651 }
8652 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8653
8654 /*
8655 * Flush volatile registers as we make the call.
8656 */
8657 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8658
8659 return off;
8660}
8661
8662
8663
8664/*********************************************************************************************************************************
8665* TLB Lookup. *
8666*********************************************************************************************************************************/
8667
8668/**
8669 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8670 */
8671DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8672{
8673 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8674 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8675 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8676 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8677
8678 /* Do the lookup manually. */
8679 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8680 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8681 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8682 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8683 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8684 {
8685 /*
8686 * Check TLB page table level access flags.
8687 */
8688 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8689 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8690 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8691 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8692 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8693 | IEMTLBE_F_PG_UNASSIGNED
8694 | IEMTLBE_F_PT_NO_ACCESSED
8695 | fNoWriteNoDirty | fNoUser);
8696 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8697 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8698 {
8699 /*
8700 * Return the address.
8701 */
8702 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8703 if ((uintptr_t)pbAddr == uResult)
8704 return;
8705 RT_NOREF(cbMem);
8706 AssertFailed();
8707 }
8708 else
8709 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8710 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8711 }
8712 else
8713 AssertFailed();
8714 RT_BREAKPOINT();
8715}
8716
8717/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8718
8719
8720
8721/*********************************************************************************************************************************
8722* Recompiler Core. *
8723*********************************************************************************************************************************/
8724
8725/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8726static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8727{
8728 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8729 pDis->cbCachedInstr += cbMaxRead;
8730 RT_NOREF(cbMinRead);
8731 return VERR_NO_DATA;
8732}
8733
8734
8735DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8736{
8737 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8738 {
8739#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8740 ENTRY(fLocalForcedActions),
8741 ENTRY(iem.s.rcPassUp),
8742 ENTRY(iem.s.fExec),
8743 ENTRY(iem.s.pbInstrBuf),
8744 ENTRY(iem.s.uInstrBufPc),
8745 ENTRY(iem.s.GCPhysInstrBuf),
8746 ENTRY(iem.s.cbInstrBufTotal),
8747 ENTRY(iem.s.idxTbCurInstr),
8748 ENTRY(iem.s.fSkippingEFlags),
8749#ifdef VBOX_WITH_STATISTICS
8750 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8751 ENTRY(iem.s.StatNativeTlbHitsForStore),
8752 ENTRY(iem.s.StatNativeTlbHitsForStack),
8753 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8754 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8755 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8756 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8757 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8758#endif
8759 ENTRY(iem.s.DataTlb.uTlbRevision),
8760 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8761 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8762 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8763 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8764 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8765 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8766 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8767 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8768 ENTRY(iem.s.DataTlb.aEntries),
8769 ENTRY(iem.s.CodeTlb.uTlbRevision),
8770 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8771 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8772 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8773 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8774 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8775 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8776 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8777 ENTRY(iem.s.CodeTlb.aEntries),
8778 ENTRY(pVMR3),
8779 ENTRY(cpum.GstCtx.rax),
8780 ENTRY(cpum.GstCtx.ah),
8781 ENTRY(cpum.GstCtx.rcx),
8782 ENTRY(cpum.GstCtx.ch),
8783 ENTRY(cpum.GstCtx.rdx),
8784 ENTRY(cpum.GstCtx.dh),
8785 ENTRY(cpum.GstCtx.rbx),
8786 ENTRY(cpum.GstCtx.bh),
8787 ENTRY(cpum.GstCtx.rsp),
8788 ENTRY(cpum.GstCtx.rbp),
8789 ENTRY(cpum.GstCtx.rsi),
8790 ENTRY(cpum.GstCtx.rdi),
8791 ENTRY(cpum.GstCtx.r8),
8792 ENTRY(cpum.GstCtx.r9),
8793 ENTRY(cpum.GstCtx.r10),
8794 ENTRY(cpum.GstCtx.r11),
8795 ENTRY(cpum.GstCtx.r12),
8796 ENTRY(cpum.GstCtx.r13),
8797 ENTRY(cpum.GstCtx.r14),
8798 ENTRY(cpum.GstCtx.r15),
8799 ENTRY(cpum.GstCtx.es.Sel),
8800 ENTRY(cpum.GstCtx.es.u64Base),
8801 ENTRY(cpum.GstCtx.es.u32Limit),
8802 ENTRY(cpum.GstCtx.es.Attr),
8803 ENTRY(cpum.GstCtx.cs.Sel),
8804 ENTRY(cpum.GstCtx.cs.u64Base),
8805 ENTRY(cpum.GstCtx.cs.u32Limit),
8806 ENTRY(cpum.GstCtx.cs.Attr),
8807 ENTRY(cpum.GstCtx.ss.Sel),
8808 ENTRY(cpum.GstCtx.ss.u64Base),
8809 ENTRY(cpum.GstCtx.ss.u32Limit),
8810 ENTRY(cpum.GstCtx.ss.Attr),
8811 ENTRY(cpum.GstCtx.ds.Sel),
8812 ENTRY(cpum.GstCtx.ds.u64Base),
8813 ENTRY(cpum.GstCtx.ds.u32Limit),
8814 ENTRY(cpum.GstCtx.ds.Attr),
8815 ENTRY(cpum.GstCtx.fs.Sel),
8816 ENTRY(cpum.GstCtx.fs.u64Base),
8817 ENTRY(cpum.GstCtx.fs.u32Limit),
8818 ENTRY(cpum.GstCtx.fs.Attr),
8819 ENTRY(cpum.GstCtx.gs.Sel),
8820 ENTRY(cpum.GstCtx.gs.u64Base),
8821 ENTRY(cpum.GstCtx.gs.u32Limit),
8822 ENTRY(cpum.GstCtx.gs.Attr),
8823 ENTRY(cpum.GstCtx.rip),
8824 ENTRY(cpum.GstCtx.eflags),
8825 ENTRY(cpum.GstCtx.uRipInhibitInt),
8826 ENTRY(cpum.GstCtx.cr0),
8827 ENTRY(cpum.GstCtx.cr4),
8828 ENTRY(cpum.GstCtx.aXcr[0]),
8829 ENTRY(cpum.GstCtx.aXcr[1]),
8830#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8831 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8832 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8833 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8834 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8835 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8836 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8837 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8838 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8839 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8840 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8841 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8842 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8843 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8844 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8845 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8846 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8847 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8848 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8849 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8850 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8851 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8852 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8853 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8854 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8855 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8856 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8857 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8858 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8859 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8860 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8861 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8862 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8863 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8864#endif
8865#undef ENTRY
8866 };
8867#ifdef VBOX_STRICT
8868 static bool s_fOrderChecked = false;
8869 if (!s_fOrderChecked)
8870 {
8871 s_fOrderChecked = true;
8872 uint32_t offPrev = s_aMembers[0].off;
8873 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8874 {
8875 Assert(s_aMembers[i].off > offPrev);
8876 offPrev = s_aMembers[i].off;
8877 }
8878 }
8879#endif
8880
8881 /*
8882 * Binary lookup.
8883 */
8884 unsigned iStart = 0;
8885 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8886 for (;;)
8887 {
8888 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8889 uint32_t const offCur = s_aMembers[iCur].off;
8890 if (off < offCur)
8891 {
8892 if (iCur != iStart)
8893 iEnd = iCur;
8894 else
8895 break;
8896 }
8897 else if (off > offCur)
8898 {
8899 if (iCur + 1 < iEnd)
8900 iStart = iCur + 1;
8901 else
8902 break;
8903 }
8904 else
8905 return s_aMembers[iCur].pszName;
8906 }
8907#ifdef VBOX_WITH_STATISTICS
8908 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8909 return "iem.s.acThreadedFuncStats[iFn]";
8910#endif
8911 return NULL;
8912}
8913
8914
8915/**
8916 * Translates a label to a name.
8917 */
8918static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
8919{
8920 switch (enmLabel)
8921 {
8922#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
8923 STR_CASE_CMN(Invalid);
8924 STR_CASE_CMN(RaiseDe);
8925 STR_CASE_CMN(RaiseUd);
8926 STR_CASE_CMN(RaiseSseRelated);
8927 STR_CASE_CMN(RaiseAvxRelated);
8928 STR_CASE_CMN(RaiseSseAvxFpRelated);
8929 STR_CASE_CMN(RaiseNm);
8930 STR_CASE_CMN(RaiseGp0);
8931 STR_CASE_CMN(RaiseMf);
8932 STR_CASE_CMN(RaiseXf);
8933 STR_CASE_CMN(ObsoleteTb);
8934 STR_CASE_CMN(NeedCsLimChecking);
8935 STR_CASE_CMN(CheckBranchMiss);
8936 STR_CASE_CMN(Return);
8937 STR_CASE_CMN(ReturnBreak);
8938 STR_CASE_CMN(ReturnBreakFF);
8939 STR_CASE_CMN(ReturnWithFlags);
8940 STR_CASE_CMN(ReturnBreakViaLookup);
8941 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
8942 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
8943 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
8944 STR_CASE_CMN(NonZeroRetOrPassUp);
8945#undef STR_CASE_CMN
8946#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
8947 STR_CASE_LBL(If);
8948 STR_CASE_LBL(Else);
8949 STR_CASE_LBL(Endif);
8950 STR_CASE_LBL(CheckIrq);
8951 STR_CASE_LBL(TlbLookup);
8952 STR_CASE_LBL(TlbMiss);
8953 STR_CASE_LBL(TlbDone);
8954 case kIemNativeLabelType_End: break;
8955 }
8956 return NULL;
8957}
8958
8959
8960/** Info for the symbols resolver used when disassembling. */
8961typedef struct IEMNATIVDISASMSYMCTX
8962{
8963 PVMCPU pVCpu;
8964# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8965 PCIEMNATIVEPERCHUNKCTX pCtx;
8966# endif
8967} IEMNATIVDISASMSYMCTX;
8968typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
8969
8970
8971/**
8972 * Resolve address to symbol, if we can.
8973 */
8974static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress)
8975{
8976#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8977 PCIEMNATIVEPERCHUNKCTX pChunkCtx = pSymCtx->pCtx;
8978 if (pChunkCtx)
8979 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
8980 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
8981 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
8982#endif
8983 return NULL;
8984}
8985
8986#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8987
8988/**
8989 * @callback_method_impl{FNDISGETSYMBOL}
8990 */
8991static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
8992 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
8993{
8994 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress);
8995 if (pszSym)
8996 {
8997 *poff = 0;
8998 return RTStrCopy(pszBuf, cchBuf, pszSym);
8999 }
9000 RT_NOREF(pDis, u32Sel);
9001 return VERR_SYMBOL_NOT_FOUND;
9002}
9003
9004#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9005
9006/**
9007 * Annotates an instruction decoded by the capstone disassembler.
9008 */
9009static const char *
9010iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9011{
9012# if defined(RT_ARCH_ARM64)
9013 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9014 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9015 {
9016 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9017 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9018 char const *psz = strchr(pInstr->op_str, '[');
9019 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9020 {
9021 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9022 int32_t off = -1;
9023 psz += 4;
9024 if (*psz == ']')
9025 off = 0;
9026 else if (*psz == ',')
9027 {
9028 psz = RTStrStripL(psz + 1);
9029 if (*psz == '#')
9030 off = RTStrToInt32(&psz[1]);
9031 /** @todo deal with index registers and LSL as well... */
9032 }
9033 if (off >= 0)
9034 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9035 }
9036 }
9037 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9038 {
9039 const char *pszAddr = strchr(pInstr->op_str, '#');
9040 if (pszAddr)
9041 {
9042 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9043 if (uAddr != 0)
9044 return iemNativeDisasmGetSymbol(pSymCtx, uAddr);
9045 }
9046 }
9047# endif
9048 RT_NOREF(pszBuf, cchBuf);
9049 return NULL;
9050}
9051#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9052
9053
9054DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9055{
9056 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9057#if defined(RT_ARCH_AMD64)
9058 static const char * const a_apszMarkers[] =
9059 {
9060 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9061 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9062 };
9063#endif
9064
9065 char szDisBuf[512];
9066 DISSTATE Dis;
9067 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9068 uint32_t const cNative = pTb->Native.cInstructions;
9069 uint32_t offNative = 0;
9070#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9071 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9072#endif
9073 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9074 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9075 : DISCPUMODE_64BIT;
9076#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9077 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9078#else
9079 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu };
9080#endif
9081#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9082 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9083#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9084 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9085#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9086# error "Port me"
9087#else
9088 csh hDisasm = ~(size_t)0;
9089# if defined(RT_ARCH_AMD64)
9090 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9091# elif defined(RT_ARCH_ARM64)
9092 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9093# else
9094# error "Port me"
9095# endif
9096 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9097
9098 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9099 //Assert(rcCs == CS_ERR_OK);
9100#endif
9101
9102 /*
9103 * Print TB info.
9104 */
9105 pHlp->pfnPrintf(pHlp,
9106 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9107 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9108 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9109 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9110#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9111 if (pDbgInfo && pDbgInfo->cEntries > 1)
9112 {
9113 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9114
9115 /*
9116 * This disassembly is driven by the debug info which follows the native
9117 * code and indicates when it starts with the next guest instructions,
9118 * where labels are and such things.
9119 */
9120 uint32_t idxThreadedCall = 0;
9121 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9122 uint8_t idxRange = UINT8_MAX;
9123 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9124 uint32_t offRange = 0;
9125 uint32_t offOpcodes = 0;
9126 uint32_t const cbOpcodes = pTb->cbOpcodes;
9127 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9128 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9129 uint32_t iDbgEntry = 1;
9130 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9131
9132 while (offNative < cNative)
9133 {
9134 /* If we're at or have passed the point where the next chunk of debug
9135 info starts, process it. */
9136 if (offDbgNativeNext <= offNative)
9137 {
9138 offDbgNativeNext = UINT32_MAX;
9139 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9140 {
9141 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9142 {
9143 case kIemTbDbgEntryType_GuestInstruction:
9144 {
9145 /* Did the exec flag change? */
9146 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9147 {
9148 pHlp->pfnPrintf(pHlp,
9149 " fExec change %#08x -> %#08x %s\n",
9150 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9151 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9152 szDisBuf, sizeof(szDisBuf)));
9153 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9154 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9155 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9156 : DISCPUMODE_64BIT;
9157 }
9158
9159 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9160 where the compilation was aborted before the opcode was recorded and the actual
9161 instruction was translated to a threaded call. This may happen when we run out
9162 of ranges, or when some complicated interrupts/FFs are found to be pending or
9163 similar. So, we just deal with it here rather than in the compiler code as it
9164 is a lot simpler to do here. */
9165 if ( idxRange == UINT8_MAX
9166 || idxRange >= cRanges
9167 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9168 {
9169 idxRange += 1;
9170 if (idxRange < cRanges)
9171 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9172 else
9173 continue;
9174 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9175 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9176 + (pTb->aRanges[idxRange].idxPhysPage == 0
9177 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9178 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9179 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9180 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9181 pTb->aRanges[idxRange].idxPhysPage);
9182 GCPhysPc += offRange;
9183 }
9184
9185 /* Disassemble the instruction. */
9186 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9187 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9188 uint32_t cbInstr = 1;
9189 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9190 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9191 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9192 if (RT_SUCCESS(rc))
9193 {
9194 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9195 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9196 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9197 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9198
9199 static unsigned const s_offMarker = 55;
9200 static char const s_szMarker[] = " ; <--- guest";
9201 if (cch < s_offMarker)
9202 {
9203 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9204 cch = s_offMarker;
9205 }
9206 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9207 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9208
9209 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9210 }
9211 else
9212 {
9213 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9214 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9215 cbInstr = 1;
9216 }
9217 GCPhysPc += cbInstr;
9218 offOpcodes += cbInstr;
9219 offRange += cbInstr;
9220 continue;
9221 }
9222
9223 case kIemTbDbgEntryType_ThreadedCall:
9224 pHlp->pfnPrintf(pHlp,
9225 " Call #%u to %s (%u args) - %s\n",
9226 idxThreadedCall,
9227 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9228 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9229 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9230 idxThreadedCall++;
9231 continue;
9232
9233 case kIemTbDbgEntryType_GuestRegShadowing:
9234 {
9235 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9236 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9237 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9238 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9239 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9240 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9241 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9242 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9243 else
9244 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9245 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9246 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9247 continue;
9248 }
9249
9250#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9251 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9252 {
9253 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9254 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9255 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9256 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9257 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9258 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9259 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9260 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9261 else
9262 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9263 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9264 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9265 continue;
9266 }
9267#endif
9268
9269 case kIemTbDbgEntryType_Label:
9270 {
9271 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9272 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9273 {
9274 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9275 ? " ; regs state restored pre-if-block" : "";
9276 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9277 }
9278 else
9279 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9280 continue;
9281 }
9282
9283 case kIemTbDbgEntryType_NativeOffset:
9284 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9285 Assert(offDbgNativeNext >= offNative);
9286 break;
9287
9288#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9289 case kIemTbDbgEntryType_DelayedPcUpdate:
9290 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9291 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9292 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9293 continue;
9294#endif
9295
9296#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9297 case kIemTbDbgEntryType_GuestRegDirty:
9298 {
9299 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9300 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9301 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9302 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9303 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9304 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9305 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9306 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9307 pszGstReg, pszHstReg);
9308 continue;
9309 }
9310
9311 case kIemTbDbgEntryType_GuestRegWriteback:
9312 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9313 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9314 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9315 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9316 continue;
9317#endif
9318
9319 default:
9320 AssertFailed();
9321 }
9322 iDbgEntry++;
9323 break;
9324 }
9325 }
9326
9327 /*
9328 * Disassemble the next native instruction.
9329 */
9330 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9331# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9332 uint32_t cbInstr = sizeof(paNative[0]);
9333 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9334 if (RT_SUCCESS(rc))
9335 {
9336# if defined(RT_ARCH_AMD64)
9337 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9338 {
9339 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9340 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9341 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9342 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9343 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9344 uInfo & 0x8000 ? "recompiled" : "todo");
9345 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9346 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9347 else
9348 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9349 }
9350 else
9351# endif
9352 {
9353 const char *pszAnnotation = NULL;
9354# ifdef RT_ARCH_AMD64
9355 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9356 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9357 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9358 iemNativeDisasmGetSymbolCb, &SymCtx);
9359 PCDISOPPARAM pMemOp;
9360 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9361 pMemOp = &Dis.Param1;
9362 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9363 pMemOp = &Dis.Param2;
9364 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9365 pMemOp = &Dis.Param3;
9366 else
9367 pMemOp = NULL;
9368 if ( pMemOp
9369 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9370 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9371 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9372 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9373
9374#elif defined(RT_ARCH_ARM64)
9375 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9376 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9377 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9378# else
9379# error "Port me"
9380# endif
9381 if (pszAnnotation)
9382 {
9383 static unsigned const s_offAnnotation = 55;
9384 size_t const cchAnnotation = strlen(pszAnnotation);
9385 size_t cchDis = strlen(szDisBuf);
9386 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9387 {
9388 if (cchDis < s_offAnnotation)
9389 {
9390 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9391 cchDis = s_offAnnotation;
9392 }
9393 szDisBuf[cchDis++] = ' ';
9394 szDisBuf[cchDis++] = ';';
9395 szDisBuf[cchDis++] = ' ';
9396 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9397 }
9398 }
9399 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9400 }
9401 }
9402 else
9403 {
9404# if defined(RT_ARCH_AMD64)
9405 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9406 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9407# elif defined(RT_ARCH_ARM64)
9408 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9409# else
9410# error "Port me"
9411# endif
9412 cbInstr = sizeof(paNative[0]);
9413 }
9414 offNative += cbInstr / sizeof(paNative[0]);
9415
9416# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9417 cs_insn *pInstr;
9418 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9419 (uintptr_t)pNativeCur, 1, &pInstr);
9420 if (cInstrs > 0)
9421 {
9422 Assert(cInstrs == 1);
9423 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9424 size_t const cchOp = strlen(pInstr->op_str);
9425# if defined(RT_ARCH_AMD64)
9426 if (pszAnnotation)
9427 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9428 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9429 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9430 else
9431 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9432 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9433
9434# else
9435 if (pszAnnotation)
9436 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9437 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9438 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9439 else
9440 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9441 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9442# endif
9443 offNative += pInstr->size / sizeof(*pNativeCur);
9444 cs_free(pInstr, cInstrs);
9445 }
9446 else
9447 {
9448# if defined(RT_ARCH_AMD64)
9449 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9450 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9451# else
9452 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9453# endif
9454 offNative++;
9455 }
9456# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9457 }
9458 }
9459 else
9460#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9461 {
9462 /*
9463 * No debug info, just disassemble the x86 code and then the native code.
9464 *
9465 * First the guest code:
9466 */
9467 for (unsigned i = 0; i < pTb->cRanges; i++)
9468 {
9469 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9470 + (pTb->aRanges[i].idxPhysPage == 0
9471 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9472 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9473 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9474 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9475 unsigned off = pTb->aRanges[i].offOpcodes;
9476 /** @todo this ain't working when crossing pages! */
9477 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9478 while (off < cbOpcodes)
9479 {
9480 uint32_t cbInstr = 1;
9481 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9482 &pTb->pabOpcodes[off], cbOpcodes - off,
9483 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9484 if (RT_SUCCESS(rc))
9485 {
9486 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9487 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9488 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9489 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9490 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9491 GCPhysPc += cbInstr;
9492 off += cbInstr;
9493 }
9494 else
9495 {
9496 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9497 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9498 break;
9499 }
9500 }
9501 }
9502
9503 /*
9504 * Then the native code:
9505 */
9506 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9507 while (offNative < cNative)
9508 {
9509 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9510# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9511 uint32_t cbInstr = sizeof(paNative[0]);
9512 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9513 if (RT_SUCCESS(rc))
9514 {
9515# if defined(RT_ARCH_AMD64)
9516 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9517 {
9518 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9519 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9520 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9521 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9522 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9523 uInfo & 0x8000 ? "recompiled" : "todo");
9524 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9525 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9526 else
9527 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9528 }
9529 else
9530# endif
9531 {
9532# ifdef RT_ARCH_AMD64
9533 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9534 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9535 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9536 iemNativeDisasmGetSymbolCb, &SymCtx);
9537# elif defined(RT_ARCH_ARM64)
9538 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9539 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9540 iemNativeDisasmGetSymbolCb, &SymCtx);
9541# else
9542# error "Port me"
9543# endif
9544 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9545 }
9546 }
9547 else
9548 {
9549# if defined(RT_ARCH_AMD64)
9550 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9551 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9552# else
9553 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9554# endif
9555 cbInstr = sizeof(paNative[0]);
9556 }
9557 offNative += cbInstr / sizeof(paNative[0]);
9558
9559# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9560 cs_insn *pInstr;
9561 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9562 (uintptr_t)pNativeCur, 1, &pInstr);
9563 if (cInstrs > 0)
9564 {
9565 Assert(cInstrs == 1);
9566 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9567 size_t const cchOp = strlen(pInstr->op_str);
9568# if defined(RT_ARCH_AMD64)
9569 if (pszAnnotation)
9570 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9571 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9572 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9573 else
9574 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9575 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9576
9577# else
9578 if (pszAnnotation)
9579 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9580 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9581 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9582 else
9583 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9584 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9585# endif
9586 offNative += pInstr->size / sizeof(*pNativeCur);
9587 cs_free(pInstr, cInstrs);
9588 }
9589 else
9590 {
9591# if defined(RT_ARCH_AMD64)
9592 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9593 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9594# else
9595 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9596# endif
9597 offNative++;
9598 }
9599# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9600 }
9601 }
9602
9603#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9604 /* Cleanup. */
9605 cs_close(&hDisasm);
9606#endif
9607}
9608
9609
9610#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9611
9612/** Emit alignment padding between labels / functions. */
9613DECL_INLINE_THROW(uint32_t)
9614iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9615{
9616 if (off & fAlignMask)
9617 {
9618 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9619 while (off & fAlignMask)
9620# if defined(RT_ARCH_AMD64)
9621 pCodeBuf[off++] = 0xcc;
9622# elif defined(RT_ARCH_ARM64)
9623 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9624# else
9625# error "port me"
9626# endif
9627 }
9628 return off;
9629}
9630
9631
9632/**
9633 * Called when a new chunk is allocate to emit common per-chunk code.
9634 *
9635 * Allocates a per-chunk context directly from the chunk itself and place the
9636 * common code there.
9637 *
9638 * @returns Pointer to the chunk context start.
9639 * @param pVCpu The cross context virtual CPU structure of the calling
9640 * thread.
9641 * @param idxChunk The index of the chunk being added and requiring a
9642 * common code context.
9643 */
9644DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk)
9645{
9646 /*
9647 * Allocate a new recompiler state (since we're likely to be called while
9648 * the default one is fully loaded already with a recompiled TB).
9649 *
9650 * This is a bit of overkill, but this isn't a frequently used code path.
9651 */
9652 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9653 AssertReturn(pReNative, NULL);
9654
9655# if defined(RT_ARCH_AMD64)
9656 uint32_t const fAlignMask = 15;
9657# elif defined(RT_ARCH_ARM64)
9658 uint32_t const fAlignMask = 31 / 4;
9659# else
9660# error "port me"
9661# endif
9662 uint32_t aoffLabels[kIemNativeExitReason_Max] = {0};
9663 int rc = VINF_SUCCESS;
9664 uint32_t off = 0;
9665
9666 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9667 {
9668 /*
9669 * Emit the epilog code.
9670 */
9671 aoffLabels[kIemNativeExitReason_Return] = off;
9672 off = iemNativeEmitCoreEpilog(pReNative, off);
9673
9674 /*
9675 * Generate special jump labels. All of these gets a copy of the epilog code.
9676 */
9677 static struct
9678 {
9679 IEMNATIVEEXITREASON enmExitReason;
9680 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9681 } const s_aSpecialWithEpilogs[] =
9682 {
9683 { kIemNativeExitReason_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9684 { kIemNativeExitReason_ReturnBreak, iemNativeEmitCoreReturnBreak },
9685 { kIemNativeExitReason_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9686 { kIemNativeExitReason_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9687 };
9688 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9689 {
9690 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9691 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9692 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9693 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9694 off = iemNativeEmitCoreEpilog(pReNative, off);
9695 }
9696
9697 /*
9698 * Do what iemNativeEmitReturnBreakViaLookup does.
9699 */
9700 static struct
9701 {
9702 IEMNATIVEEXITREASON enmExitReason;
9703 uintptr_t pfnHelper;
9704 } const s_aViaLookup[] =
9705 {
9706 { kIemNativeExitReason_ReturnBreakViaLookup,
9707 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9708 { kIemNativeExitReason_ReturnBreakViaLookupWithIrq,
9709 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9710 { kIemNativeExitReason_ReturnBreakViaLookupWithTlb,
9711 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9712 { kIemNativeExitReason_ReturnBreakViaLookupWithTlbAndIrq,
9713 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9714 };
9715 uint32_t const offReturnBreak = aoffLabels[kIemNativeExitReason_ReturnBreak]; Assert(offReturnBreak != 0);
9716 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9717 {
9718 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9719 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9720 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9721 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9722 }
9723
9724 /*
9725 * Generate simple TB tail labels that just calls a help with a pVCpu
9726 * arg and either return or longjmps/throws a non-zero status.
9727 */
9728 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9729 static struct
9730 {
9731 IEMNATIVEEXITREASON enmExitReason;
9732 bool fWithEpilog;
9733 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9734 } const s_aSimpleTailLabels[] =
9735 {
9736 { kIemNativeExitReason_RaiseDe, false, iemNativeHlpExecRaiseDe },
9737 { kIemNativeExitReason_RaiseUd, false, iemNativeHlpExecRaiseUd },
9738 { kIemNativeExitReason_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9739 { kIemNativeExitReason_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9740 { kIemNativeExitReason_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9741 { kIemNativeExitReason_RaiseNm, false, iemNativeHlpExecRaiseNm },
9742 { kIemNativeExitReason_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9743 { kIemNativeExitReason_RaiseMf, false, iemNativeHlpExecRaiseMf },
9744 { kIemNativeExitReason_RaiseXf, false, iemNativeHlpExecRaiseXf },
9745 { kIemNativeExitReason_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9746 { kIemNativeExitReason_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9747 { kIemNativeExitReason_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9748 };
9749 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9750 {
9751 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9752 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9753 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9754
9755 /* int pfnCallback(PVMCPUCC pVCpu) */
9756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9757 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9758
9759 /* jump back to the return sequence / generate a return sequence. */
9760 if (!s_aSimpleTailLabels[i].fWithEpilog)
9761 off = iemNativeEmitJmpToFixed(pReNative, off, aoffLabels[kIemNativeExitReason_Return]);
9762 else
9763 off = iemNativeEmitCoreEpilog(pReNative, off);
9764 }
9765
9766
9767# ifdef VBOX_STRICT
9768 /* Make sure we've generate code for all labels. */
9769 for (uint32_t i = kIemNativeExitReason_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
9770 Assert(aoffLabels[i] != 0 || i == kIemNativeExitReason_Return);
9771#endif
9772 }
9773 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9774 {
9775 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
9776 iemNativeTerm(pReNative);
9777 return NULL;
9778 }
9779 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9780
9781 /*
9782 * Allocate memory for the context (first) and the common code (last).
9783 */
9784 PIEMNATIVEPERCHUNKCTX pCtx;
9785 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
9786 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
9787 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
9788 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
9789 AssertLogRelMsgReturn(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk), NULL);
9790
9791 /*
9792 * Copy over the generated code.
9793 * There should be no fixups or labels defined here.
9794 */
9795 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
9796 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
9797
9798 Assert(pReNative->cFixups == 0);
9799 Assert(pReNative->cLabels == 0);
9800
9801 /*
9802 * Initialize the context.
9803 */
9804 AssertCompile(kIemNativeExitReason_Invalid == 0);
9805 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
9806 pCtx->apExitLabels[kIemNativeExitReason_Invalid] = 0;
9807 for (uint32_t i = kIemNativeExitReason_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
9808 {
9809 Assert(aoffLabels[i] != 0 || i == kIemNativeExitReason_Return);
9810 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
9811 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
9812 }
9813
9814 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
9815
9816 iemNativeTerm(pReNative);
9817 return pCtx;
9818}
9819
9820#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
9821
9822/**
9823 * Recompiles the given threaded TB into a native one.
9824 *
9825 * In case of failure the translation block will be returned as-is.
9826 *
9827 * @returns pTb.
9828 * @param pVCpu The cross context virtual CPU structure of the calling
9829 * thread.
9830 * @param pTb The threaded translation to recompile to native.
9831 */
9832DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9833{
9834#if 0 /* For profiling the native recompiler code. */
9835l_profile_again:
9836#endif
9837 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9838
9839 /*
9840 * The first time thru, we allocate the recompiler state and save it,
9841 * all the other times we'll just reuse the saved one after a quick reset.
9842 */
9843 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9844 if (RT_LIKELY(pReNative))
9845 iemNativeReInit(pReNative, pTb);
9846 else
9847 {
9848 pReNative = iemNativeInit(pVCpu, pTb);
9849 AssertReturn(pReNative, pTb);
9850 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
9851 }
9852
9853#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9854 /*
9855 * First do liveness analysis. This is done backwards.
9856 */
9857 {
9858 uint32_t idxCall = pTb->Thrd.cCalls;
9859 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9860 { /* likely */ }
9861 else
9862 {
9863 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9864 while (idxCall > cAlloc)
9865 cAlloc *= 2;
9866 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9867 AssertReturn(pvNew, pTb);
9868 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9869 pReNative->cLivenessEntriesAlloc = cAlloc;
9870 }
9871 AssertReturn(idxCall > 0, pTb);
9872 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9873
9874 /* The initial (final) entry. */
9875 idxCall--;
9876 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9877
9878 /* Loop backwards thru the calls and fill in the other entries. */
9879 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9880 while (idxCall > 0)
9881 {
9882 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9883 if (pfnLiveness)
9884 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9885 else
9886 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9887 pCallEntry--;
9888 idxCall--;
9889 }
9890
9891# ifdef VBOX_WITH_STATISTICS
9892 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9893 to 'clobbered' rather that 'input'. */
9894 /** @todo */
9895# endif
9896 }
9897#endif
9898
9899 /*
9900 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9901 * for aborting if an error happens.
9902 */
9903 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9904#ifdef LOG_ENABLED
9905 uint32_t const cCallsOrg = cCallsLeft;
9906#endif
9907 uint32_t off = 0;
9908 int rc = VINF_SUCCESS;
9909 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9910 {
9911#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
9912 /*
9913 * Emit prolog code (fixed).
9914 */
9915 off = iemNativeEmitProlog(pReNative, off);
9916#endif
9917
9918 /*
9919 * Convert the calls to native code.
9920 */
9921#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9922 int32_t iGstInstr = -1;
9923#endif
9924#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9925 uint32_t cThreadedCalls = 0;
9926 uint32_t cRecompiledCalls = 0;
9927#endif
9928#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9929 uint32_t idxCurCall = 0;
9930#endif
9931 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9932 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9933 while (cCallsLeft-- > 0)
9934 {
9935 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9936#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9937 pReNative->idxCurCall = idxCurCall;
9938#endif
9939
9940 /*
9941 * Debug info, assembly markup and statistics.
9942 */
9943#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9944 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9945 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9946#endif
9947#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9948 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9949 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9950 {
9951 if (iGstInstr < (int32_t)pTb->cInstructions)
9952 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9953 else
9954 Assert(iGstInstr == pTb->cInstructions);
9955 iGstInstr = pCallEntry->idxInstr;
9956 }
9957 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9958#endif
9959#if defined(VBOX_STRICT)
9960 off = iemNativeEmitMarker(pReNative, off,
9961 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9962#endif
9963#if defined(VBOX_STRICT)
9964 iemNativeRegAssertSanity(pReNative);
9965#endif
9966#ifdef VBOX_WITH_STATISTICS
9967 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9968#endif
9969
9970#if 0
9971 if ( pTb->GCPhysPc == 0x00000000000c1240
9972 && idxCurCall == 67)
9973 off = iemNativeEmitBrk(pReNative, off, 0xf000);
9974#endif
9975
9976 /*
9977 * Actual work.
9978 */
9979 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9980 pfnRecom ? "(recompiled)" : "(todo)"));
9981 if (pfnRecom) /** @todo stats on this. */
9982 {
9983 off = pfnRecom(pReNative, off, pCallEntry);
9984 STAM_REL_STATS({cRecompiledCalls++;});
9985 }
9986 else
9987 {
9988 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9989 STAM_REL_STATS({cThreadedCalls++;});
9990 }
9991 Assert(off <= pReNative->cInstrBufAlloc);
9992 Assert(pReNative->cCondDepth == 0);
9993
9994#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9995 if (LogIs2Enabled())
9996 {
9997 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9998# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9999 static const char s_achState[] = "CUXI";
10000# else
10001 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10002# endif
10003
10004 char szGpr[17];
10005 for (unsigned i = 0; i < 16; i++)
10006 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10007 szGpr[16] = '\0';
10008
10009 char szSegBase[X86_SREG_COUNT + 1];
10010 char szSegLimit[X86_SREG_COUNT + 1];
10011 char szSegAttrib[X86_SREG_COUNT + 1];
10012 char szSegSel[X86_SREG_COUNT + 1];
10013 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10014 {
10015 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10016 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10017 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10018 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10019 }
10020 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10021 = szSegSel[X86_SREG_COUNT] = '\0';
10022
10023 char szEFlags[8];
10024 for (unsigned i = 0; i < 7; i++)
10025 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10026 szEFlags[7] = '\0';
10027
10028 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10029 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10030 }
10031#endif
10032
10033 /*
10034 * Advance.
10035 */
10036 pCallEntry++;
10037#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10038 idxCurCall++;
10039#endif
10040 }
10041
10042 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10043 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10044 if (!cThreadedCalls)
10045 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10046
10047#ifdef VBOX_WITH_STATISTICS
10048 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10049#endif
10050
10051 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10052 off = iemNativeRegFlushPendingWrites(pReNative, off);
10053
10054 /*
10055 * Successful return, so clear the return register (eax, w0).
10056 */
10057 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
10058
10059#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10060 /*
10061 * Emit the epilog code.
10062 */
10063 uint32_t idxReturnLabel;
10064 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10065#else
10066 /*
10067 * Jump to the common per-chunk epilog code.
10068 */
10069 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10070 off = iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_Return);
10071#endif
10072
10073#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10074 /*
10075 * Generate special jump labels.
10076 */
10077 off = iemNativeEmitRcFiddling(pReNative, off, idxReturnLabel);
10078
10079 bool const fReturnBreakViaLookup = RT_BOOL( pReNative->bmLabelTypes
10080 & ( RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookup)
10081 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithIrq)
10082 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlb)
10083 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq)));
10084 if (fReturnBreakViaLookup)
10085 {
10086 uint32_t const idxReturnBreakLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
10087 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10088 off = iemNativeEmitReturnBreakViaLookup(pReNative, off, idxReturnBreakLabel);
10089 }
10090 else if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10091 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10092
10093 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
10094 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
10095
10096 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10097 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10098
10099 /*
10100 * Generate simple TB tail labels that just calls a help with a pVCpu
10101 * arg and either return or longjmps/throws a non-zero status.
10102 *
10103 * The array entries must be ordered by enmLabel value so we can index
10104 * using fTailLabels bit numbers.
10105 */
10106 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10107 static struct
10108 {
10109 IEMNATIVELABELTYPE enmLabel;
10110 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10111 } const g_aSimpleTailLabels[] =
10112 {
10113 { kIemNativeLabelType_Invalid, NULL },
10114 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10115 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10116 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10117 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10118 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10119 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10120 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10121 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10122 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10123 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10124 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10125 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10126 };
10127
10128 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10129 AssertCompile(kIemNativeLabelType_Invalid == 0);
10130 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10131 if (fTailLabels)
10132 {
10133 do
10134 {
10135 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10136 fTailLabels &= ~RT_BIT_64(enmLabel);
10137 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10138
10139 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10140 Assert(idxLabel != UINT32_MAX);
10141 if (idxLabel != UINT32_MAX)
10142 {
10143 iemNativeLabelDefine(pReNative, idxLabel, off);
10144
10145 /* int pfnCallback(PVMCPUCC pVCpu) */
10146 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10147 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10148
10149 /* jump back to the return sequence. */
10150 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10151 }
10152
10153 } while (fTailLabels);
10154 }
10155
10156#else /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10157 /*
10158 * Generate tail labels with jumps to the common per-chunk code.
10159 */
10160# ifndef RT_ARCH_AMD64
10161 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_Return) | RT_BIT_64(kIemNativeLabelType_Invalid))));
10162 AssertCompile(kIemNativeLabelType_Invalid == 0);
10163 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10164 if (fTailLabels)
10165 {
10166 do
10167 {
10168 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10169 fTailLabels &= ~RT_BIT_64(enmLabel);
10170
10171 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10172 AssertContinue(idxLabel != UINT32_MAX);
10173 iemNativeLabelDefine(pReNative, idxLabel, off);
10174 off = iemNativeEmitTbExit(pReNative, off, (IEMNATIVEEXITREASON)enmLabel);
10175 } while (fTailLabels);
10176 }
10177# else
10178 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10179# endif
10180#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10181 }
10182 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10183 {
10184 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10185 return pTb;
10186 }
10187 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10188 Assert(off <= pReNative->cInstrBufAlloc);
10189
10190 /*
10191 * Make sure all labels has been defined.
10192 */
10193 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10194#ifdef VBOX_STRICT
10195 uint32_t const cLabels = pReNative->cLabels;
10196 for (uint32_t i = 0; i < cLabels; i++)
10197 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10198#endif
10199
10200#if 0 /* For profiling the native recompiler code. */
10201 if (pTb->Thrd.cCalls >= 136)
10202 {
10203 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10204 goto l_profile_again;
10205 }
10206#endif
10207
10208 /*
10209 * Allocate executable memory, copy over the code we've generated.
10210 */
10211 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10212 if (pTbAllocator->pDelayedFreeHead)
10213 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10214
10215 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10216#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10217 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10218 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10219 &paFinalInstrBufRx, &pCtx);
10220
10221#else
10222 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10223 &paFinalInstrBufRx, NULL);
10224#endif
10225 AssertReturn(paFinalInstrBuf, pTb);
10226 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10227
10228 /*
10229 * Apply fixups.
10230 */
10231 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10232 uint32_t const cFixups = pReNative->cFixups;
10233 for (uint32_t i = 0; i < cFixups; i++)
10234 {
10235 Assert(paFixups[i].off < off);
10236 Assert(paFixups[i].idxLabel < cLabels);
10237 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10238 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10239 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10240 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10241 switch (paFixups[i].enmType)
10242 {
10243#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10244 case kIemNativeFixupType_Rel32:
10245 Assert(paFixups[i].off + 4 <= off);
10246 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10247 continue;
10248
10249#elif defined(RT_ARCH_ARM64)
10250 case kIemNativeFixupType_RelImm26At0:
10251 {
10252 Assert(paFixups[i].off < off);
10253 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10254 Assert(offDisp >= -33554432 && offDisp < 33554432);
10255 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10256 continue;
10257 }
10258
10259 case kIemNativeFixupType_RelImm19At5:
10260 {
10261 Assert(paFixups[i].off < off);
10262 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10263 Assert(offDisp >= -262144 && offDisp < 262144);
10264 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10265 continue;
10266 }
10267
10268 case kIemNativeFixupType_RelImm14At5:
10269 {
10270 Assert(paFixups[i].off < off);
10271 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10272 Assert(offDisp >= -8192 && offDisp < 8192);
10273 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10274 continue;
10275 }
10276
10277#endif
10278 case kIemNativeFixupType_Invalid:
10279 case kIemNativeFixupType_End:
10280 break;
10281 }
10282 AssertFailed();
10283 }
10284
10285#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10286 /*
10287 * Apply TB exit fixups.
10288 */
10289 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10290 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10291 for (uint32_t i = 0; i < cTbExitFixups; i++)
10292 {
10293 Assert(paTbExitFixups[i].off < off);
10294 Assert( paTbExitFixups[i].enmExitReason < kIemNativeExitReason_Max
10295 && paTbExitFixups[i].enmExitReason > kIemNativeExitReason_Invalid);
10296 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10297
10298# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10299 Assert(paTbExitFixups[i].off + 4 <= off);
10300 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10301 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10302 *Ptr.pi32 = (int32_t)offDisp;
10303
10304# elif defined(RT_ARCH_ARM64)
10305 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10306 Assert(offDisp >= -33554432 && offDisp < 33554432);
10307 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10308
10309# else
10310# error "Port me!"
10311# endif
10312 }
10313#endif
10314
10315 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10316 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10317
10318 /*
10319 * Convert the translation block.
10320 */
10321 RTMemFree(pTb->Thrd.paCalls);
10322 pTb->Native.paInstructions = paFinalInstrBufRx;
10323 pTb->Native.cInstructions = off;
10324 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10325#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10326 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10327 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10328#endif
10329
10330 Assert(pTbAllocator->cThreadedTbs > 0);
10331 pTbAllocator->cThreadedTbs -= 1;
10332 pTbAllocator->cNativeTbs += 1;
10333 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10334
10335#ifdef LOG_ENABLED
10336 /*
10337 * Disassemble to the log if enabled.
10338 */
10339 if (LogIs3Enabled())
10340 {
10341 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10342 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10343# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10344 RTLogFlush(NULL);
10345# endif
10346 }
10347#endif
10348 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10349
10350 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10351 return pTb;
10352}
10353
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette