VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 105855

Last change on this file since 105855 was 105855, checked in by vboxsync, 3 months ago

VMM/IEM: Don't update PC during IEM_MC_REL_JMP_S[8|32]_AND_FINISH if we can avoid it. This extends the offPc to 64-bit and tries to make the stats more accurate. This is more on todo 4 in bugref:10720. bugref:10373

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 459.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 105855 2024-08-23 23:12:23Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80
81
82/*
83 * Narrow down configs here to avoid wasting time on unused configs here.
84 * Note! Same checks in IEMAllThrdRecompiler.cpp.
85 */
86
87#ifndef IEM_WITH_CODE_TLB
88# error The code TLB must be enabled for the recompiler.
89#endif
90
91#ifndef IEM_WITH_DATA_TLB
92# error The data TLB must be enabled for the recompiler.
93#endif
94
95#ifndef IEM_WITH_SETJMP
96# error The setjmp approach must be enabled for the recompiler.
97#endif
98
99/** @todo eliminate this clang build hack. */
100#if RT_CLANG_PREREQ(4, 0)
101# pragma GCC diagnostic ignored "-Wunused-function"
102#endif
103
104
105/*********************************************************************************************************************************
106* Internal Functions *
107*********************************************************************************************************************************/
108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
109static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
110#endif
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
113 IEMNATIVEGSTREG enmGstReg, uint32_t off);
114DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
115static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
116
117
118
119/*********************************************************************************************************************************
120* Native Recompilation *
121*********************************************************************************************************************************/
122
123
124/**
125 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
126 */
127IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
128{
129 pVCpu->iem.s.cInstructions += idxInstr;
130 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
131}
132
133
134/**
135 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
136 */
137DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
138{
139 uint64_t fCpu = pVCpu->fLocalForcedActions;
140 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
141 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
142 | VMCPU_FF_TLB_FLUSH
143 | VMCPU_FF_UNHALT );
144 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
145 if (RT_LIKELY( ( !fCpu
146 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
147 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
148 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
149 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
150 return false;
151 return true;
152}
153
154
155/**
156 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
157 */
158template <bool const a_fWithIrqCheck>
159IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
160 uint32_t fFlags, RTGCPHYS GCPhysPc))
161{
162 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
163 Assert(idxTbLookup < pTb->cTbLookupEntries);
164 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
165#if 1
166 PIEMTB const pNewTb = *ppNewTb;
167 if (pNewTb)
168 {
169# ifdef VBOX_STRICT
170 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
171 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
172 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
173 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
174 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
175 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
176# endif
177 if (pNewTb->GCPhysPc == GCPhysPc)
178 {
179# ifdef VBOX_STRICT
180 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
181 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
182 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
183 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
184 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
185# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
186 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
187# else
188 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
189 {
190 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
191 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
192 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
193 }
194# endif
195 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
196 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
197 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
198#endif
199
200 /*
201 * Check them + type.
202 */
203 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
204 {
205 /*
206 * Check for interrupts and stuff.
207 */
208 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
209 * The main problem are the statistics and to some degree the logging. :/ */
210 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
211 {
212 /* Do polling. */
213 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
214 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
215 {
216 /*
217 * Success. Update statistics and switch to the next TB.
218 */
219 if (a_fWithIrqCheck)
220 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
221 else
222 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
223
224 pNewTb->cUsed += 1;
225 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
226 pVCpu->iem.s.pCurTbR3 = pNewTb;
227 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
228 pVCpu->iem.s.cTbExecNative += 1;
229 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
230 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
231 return (uintptr_t)pNewTb->Native.paInstructions;
232 }
233 }
234 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
235 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
236 }
237 else
238 {
239 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
240 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
241 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
242 }
243 }
244 else
245 {
246 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
247 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
248 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
249 }
250 }
251 else
252 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
253#else
254 NOREF(GCPhysPc);
255#endif
256
257 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
258 return 0;
259}
260
261
262/**
263 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
264 */
265template <bool const a_fWithIrqCheck>
266IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
267{
268 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
269 Assert(idxTbLookup < pTb->cTbLookupEntries);
270 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
271#if 1
272 PIEMTB const pNewTb = *ppNewTb;
273 if (pNewTb)
274 {
275 /*
276 * Calculate the flags for the next TB and check if they match.
277 */
278 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
279 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
280 { /* likely */ }
281 else
282 {
283 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
284 fFlags |= IEMTB_F_INHIBIT_SHADOW;
285 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
286 fFlags |= IEMTB_F_INHIBIT_NMI;
287 }
288 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
289 {
290 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
291 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
292 { /* likely */ }
293 else
294 fFlags |= IEMTB_F_CS_LIM_CHECKS;
295 }
296 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
297
298 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
299 {
300 /*
301 * Do the TLB lookup for flat RIP and compare the result with the next TB.
302 *
303 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
304 */
305 /* Calc the effective PC. */
306 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
307 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
308 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
309
310 /* Advance within the current buffer (PAGE) when possible. */
311 RTGCPHYS GCPhysPc;
312 uint64_t off;
313 if ( pVCpu->iem.s.pbInstrBuf
314 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
315 {
316 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
317 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
318 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
319 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
320 else
321 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
322 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
323 }
324 else
325 {
326 pVCpu->iem.s.pbInstrBuf = NULL;
327 pVCpu->iem.s.offCurInstrStart = 0;
328 pVCpu->iem.s.offInstrNextByte = 0;
329 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
330 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
331 }
332
333 if (pNewTb->GCPhysPc == GCPhysPc)
334 {
335 /*
336 * Check for interrupts and stuff.
337 */
338 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
339 * The main problem are the statistics and to some degree the logging. :/ */
340 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
341 {
342 /* Do polling. */
343 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
344 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
345 {
346 /*
347 * Success. Update statistics and switch to the next TB.
348 */
349 if (a_fWithIrqCheck)
350 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
351 else
352 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
353
354 pNewTb->cUsed += 1;
355 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
356 pVCpu->iem.s.pCurTbR3 = pNewTb;
357 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
358 pVCpu->iem.s.cTbExecNative += 1;
359 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
360 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
361 return (uintptr_t)pNewTb->Native.paInstructions;
362 }
363 }
364 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
365 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
366 }
367 else
368 {
369 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
370 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
371 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
372 }
373 }
374 else
375 {
376 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
377 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
378 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
379 }
380 }
381 else
382 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
383#else
384 NOREF(fFlags);
385 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
386#endif
387
388 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
389 return 0;
390}
391
392
393/**
394 * Used by TB code when it wants to raise a \#DE.
395 */
396IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
397{
398 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
399 iemRaiseDivideErrorJmp(pVCpu);
400#ifndef _MSC_VER
401 return VINF_IEM_RAISED_XCPT; /* not reached */
402#endif
403}
404
405
406/**
407 * Used by TB code when it wants to raise a \#UD.
408 */
409IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
410{
411 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
412 iemRaiseUndefinedOpcodeJmp(pVCpu);
413#ifndef _MSC_VER
414 return VINF_IEM_RAISED_XCPT; /* not reached */
415#endif
416}
417
418
419/**
420 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
421 *
422 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
423 */
424IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
425{
426 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
427 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
428 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
429 iemRaiseUndefinedOpcodeJmp(pVCpu);
430 else
431 iemRaiseDeviceNotAvailableJmp(pVCpu);
432#ifndef _MSC_VER
433 return VINF_IEM_RAISED_XCPT; /* not reached */
434#endif
435}
436
437
438/**
439 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
440 *
441 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
442 */
443IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
444{
445 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
446 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
447 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
448 iemRaiseUndefinedOpcodeJmp(pVCpu);
449 else
450 iemRaiseDeviceNotAvailableJmp(pVCpu);
451#ifndef _MSC_VER
452 return VINF_IEM_RAISED_XCPT; /* not reached */
453#endif
454}
455
456
457/**
458 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
459 *
460 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
461 */
462IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
463{
464 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
465 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
466 iemRaiseSimdFpExceptionJmp(pVCpu);
467 else
468 iemRaiseUndefinedOpcodeJmp(pVCpu);
469#ifndef _MSC_VER
470 return VINF_IEM_RAISED_XCPT; /* not reached */
471#endif
472}
473
474
475/**
476 * Used by TB code when it wants to raise a \#NM.
477 */
478IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
479{
480 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
481 iemRaiseDeviceNotAvailableJmp(pVCpu);
482#ifndef _MSC_VER
483 return VINF_IEM_RAISED_XCPT; /* not reached */
484#endif
485}
486
487
488/**
489 * Used by TB code when it wants to raise a \#GP(0).
490 */
491IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
492{
493 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
494 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
495#ifndef _MSC_VER
496 return VINF_IEM_RAISED_XCPT; /* not reached */
497#endif
498}
499
500
501/**
502 * Used by TB code when it wants to raise a \#MF.
503 */
504IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
505{
506 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
507 iemRaiseMathFaultJmp(pVCpu);
508#ifndef _MSC_VER
509 return VINF_IEM_RAISED_XCPT; /* not reached */
510#endif
511}
512
513
514/**
515 * Used by TB code when it wants to raise a \#XF.
516 */
517IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
518{
519 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
520 iemRaiseSimdFpExceptionJmp(pVCpu);
521#ifndef _MSC_VER
522 return VINF_IEM_RAISED_XCPT; /* not reached */
523#endif
524}
525
526
527/**
528 * Used by TB code when detecting opcode changes.
529 * @see iemThreadeFuncWorkerObsoleteTb
530 */
531IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
532{
533 /* We set fSafeToFree to false where as we're being called in the context
534 of a TB callback function, which for native TBs means we cannot release
535 the executable memory till we've returned our way back to iemTbExec as
536 that return path codes via the native code generated for the TB. */
537 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
538 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
539 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
540 return VINF_IEM_REEXEC_BREAK;
541}
542
543
544/**
545 * Used by TB code when we need to switch to a TB with CS.LIM checking.
546 */
547IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
548{
549 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
550 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
551 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
552 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
553 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
554 return VINF_IEM_REEXEC_BREAK;
555}
556
557
558/**
559 * Used by TB code when we missed a PC check after a branch.
560 */
561IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
562{
563 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
564 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
565 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
566 pVCpu->iem.s.pbInstrBuf));
567 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
568 return VINF_IEM_REEXEC_BREAK;
569}
570
571
572
573/*********************************************************************************************************************************
574* Helpers: Segmented memory fetches and stores. *
575*********************************************************************************************************************************/
576
577/**
578 * Used by TB code to load unsigned 8-bit data w/ segmentation.
579 */
580IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
581{
582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
583 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
584#else
585 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
586#endif
587}
588
589
590/**
591 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
592 * to 16 bits.
593 */
594IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
595{
596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
597 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
598#else
599 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
600#endif
601}
602
603
604/**
605 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
606 * to 32 bits.
607 */
608IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
609{
610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
611 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
612#else
613 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
614#endif
615}
616
617/**
618 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
619 * to 64 bits.
620 */
621IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
622{
623#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
624 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
625#else
626 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
627#endif
628}
629
630
631/**
632 * Used by TB code to load unsigned 16-bit data w/ segmentation.
633 */
634IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
635{
636#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
637 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
638#else
639 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
640#endif
641}
642
643
644/**
645 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
646 * to 32 bits.
647 */
648IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
649{
650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
651 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
652#else
653 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
654#endif
655}
656
657
658/**
659 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
660 * to 64 bits.
661 */
662IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
663{
664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
665 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
666#else
667 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
668#endif
669}
670
671
672/**
673 * Used by TB code to load unsigned 32-bit data w/ segmentation.
674 */
675IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
676{
677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
678 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
679#else
680 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
681#endif
682}
683
684
685/**
686 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
687 * to 64 bits.
688 */
689IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
690{
691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
692 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
693#else
694 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
695#endif
696}
697
698
699/**
700 * Used by TB code to load unsigned 64-bit data w/ segmentation.
701 */
702IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
703{
704#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
705 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
706#else
707 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
708#endif
709}
710
711
712#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
713/**
714 * Used by TB code to load 128-bit data w/ segmentation.
715 */
716IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
717{
718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
719 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
720#else
721 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
722#endif
723}
724
725
726/**
727 * Used by TB code to load 128-bit data w/ segmentation.
728 */
729IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
730{
731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
732 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
733#else
734 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
735#endif
736}
737
738
739/**
740 * Used by TB code to load 128-bit data w/ segmentation.
741 */
742IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
743{
744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
745 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
746#else
747 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
748#endif
749}
750
751
752/**
753 * Used by TB code to load 256-bit data w/ segmentation.
754 */
755IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
756{
757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
758 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
759#else
760 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
761#endif
762}
763
764
765/**
766 * Used by TB code to load 256-bit data w/ segmentation.
767 */
768IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
769{
770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
771 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
772#else
773 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
774#endif
775}
776#endif
777
778
779/**
780 * Used by TB code to store unsigned 8-bit data w/ segmentation.
781 */
782IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
783{
784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
785 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
786#else
787 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
788#endif
789}
790
791
792/**
793 * Used by TB code to store unsigned 16-bit data w/ segmentation.
794 */
795IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
796{
797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
798 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
799#else
800 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
801#endif
802}
803
804
805/**
806 * Used by TB code to store unsigned 32-bit data w/ segmentation.
807 */
808IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
809{
810#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
811 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
812#else
813 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
814#endif
815}
816
817
818/**
819 * Used by TB code to store unsigned 64-bit data w/ segmentation.
820 */
821IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
822{
823#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
824 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
825#else
826 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
827#endif
828}
829
830
831#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
832/**
833 * Used by TB code to store unsigned 128-bit data w/ segmentation.
834 */
835IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
836{
837#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
838 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
839#else
840 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
841#endif
842}
843
844
845/**
846 * Used by TB code to store unsigned 128-bit data w/ segmentation.
847 */
848IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
849{
850#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
851 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
852#else
853 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
854#endif
855}
856
857
858/**
859 * Used by TB code to store unsigned 256-bit data w/ segmentation.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
864 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
865#else
866 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
867#endif
868}
869
870
871/**
872 * Used by TB code to store unsigned 256-bit data w/ segmentation.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
877 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
878#else
879 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
880#endif
881}
882#endif
883
884
885
886/**
887 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
888 */
889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
890{
891#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
892 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
893#else
894 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
895#endif
896}
897
898
899/**
900 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
901 */
902IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
903{
904#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
905 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
906#else
907 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
908#endif
909}
910
911
912/**
913 * Used by TB code to store an 32-bit selector value onto a generic stack.
914 *
915 * Intel CPUs doesn't do write a whole dword, thus the special function.
916 */
917IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
918{
919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
920 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
921#else
922 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
923#endif
924}
925
926
927/**
928 * Used by TB code to push unsigned 64-bit value onto a generic stack.
929 */
930IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
931{
932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
933 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
934#else
935 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
936#endif
937}
938
939
940/**
941 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
942 */
943IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
944{
945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
946 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
947#else
948 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
949#endif
950}
951
952
953/**
954 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
955 */
956IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
957{
958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
959 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
960#else
961 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
962#endif
963}
964
965
966/**
967 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
968 */
969IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
970{
971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
972 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
973#else
974 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
975#endif
976}
977
978
979
980/*********************************************************************************************************************************
981* Helpers: Flat memory fetches and stores. *
982*********************************************************************************************************************************/
983
984/**
985 * Used by TB code to load unsigned 8-bit data w/ flat address.
986 * @note Zero extending the value to 64-bit to simplify assembly.
987 */
988IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
989{
990#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
991 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
992#else
993 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
994#endif
995}
996
997
998/**
999 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1000 * to 16 bits.
1001 * @note Zero extending the value to 64-bit to simplify assembly.
1002 */
1003IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1004{
1005#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1006 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1007#else
1008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1009#endif
1010}
1011
1012
1013/**
1014 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1015 * to 32 bits.
1016 * @note Zero extending the value to 64-bit to simplify assembly.
1017 */
1018IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1019{
1020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1021 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1022#else
1023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1024#endif
1025}
1026
1027
1028/**
1029 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1030 * to 64 bits.
1031 */
1032IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1033{
1034#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1035 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1036#else
1037 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1038#endif
1039}
1040
1041
1042/**
1043 * Used by TB code to load unsigned 16-bit data w/ flat address.
1044 * @note Zero extending the value to 64-bit to simplify assembly.
1045 */
1046IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1047{
1048#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1049 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1050#else
1051 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1052#endif
1053}
1054
1055
1056/**
1057 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1058 * to 32 bits.
1059 * @note Zero extending the value to 64-bit to simplify assembly.
1060 */
1061IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1062{
1063#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1064 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1065#else
1066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1067#endif
1068}
1069
1070
1071/**
1072 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1073 * to 64 bits.
1074 * @note Zero extending the value to 64-bit to simplify assembly.
1075 */
1076IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1077{
1078#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1079 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1080#else
1081 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1082#endif
1083}
1084
1085
1086/**
1087 * Used by TB code to load unsigned 32-bit data w/ flat address.
1088 * @note Zero extending the value to 64-bit to simplify assembly.
1089 */
1090IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1091{
1092#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1093 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1094#else
1095 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1096#endif
1097}
1098
1099
1100/**
1101 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1102 * to 64 bits.
1103 * @note Zero extending the value to 64-bit to simplify assembly.
1104 */
1105IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1106{
1107#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1108 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1109#else
1110 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1111#endif
1112}
1113
1114
1115/**
1116 * Used by TB code to load unsigned 64-bit data w/ flat address.
1117 */
1118IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1119{
1120#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1121 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1122#else
1123 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1124#endif
1125}
1126
1127
1128#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1129/**
1130 * Used by TB code to load unsigned 128-bit data w/ flat address.
1131 */
1132IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1133{
1134#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1135 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1136#else
1137 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1138#endif
1139}
1140
1141
1142/**
1143 * Used by TB code to load unsigned 128-bit data w/ flat address.
1144 */
1145IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1146{
1147#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1148 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1149#else
1150 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1151#endif
1152}
1153
1154
1155/**
1156 * Used by TB code to load unsigned 128-bit data w/ flat address.
1157 */
1158IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1159{
1160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1161 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1162#else
1163 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1164#endif
1165}
1166
1167
1168/**
1169 * Used by TB code to load unsigned 256-bit data w/ flat address.
1170 */
1171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1172{
1173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1174 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1175#else
1176 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1177#endif
1178}
1179
1180
1181/**
1182 * Used by TB code to load unsigned 256-bit data w/ flat address.
1183 */
1184IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1185{
1186#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1187 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1188#else
1189 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1190#endif
1191}
1192#endif
1193
1194
1195/**
1196 * Used by TB code to store unsigned 8-bit data w/ flat address.
1197 */
1198IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1199{
1200#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1201 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1202#else
1203 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1204#endif
1205}
1206
1207
1208/**
1209 * Used by TB code to store unsigned 16-bit data w/ flat address.
1210 */
1211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1212{
1213#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1214 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1215#else
1216 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1217#endif
1218}
1219
1220
1221/**
1222 * Used by TB code to store unsigned 32-bit data w/ flat address.
1223 */
1224IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1225{
1226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1227 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1228#else
1229 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1230#endif
1231}
1232
1233
1234/**
1235 * Used by TB code to store unsigned 64-bit data w/ flat address.
1236 */
1237IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1238{
1239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1240 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1241#else
1242 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1243#endif
1244}
1245
1246
1247#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1248/**
1249 * Used by TB code to store unsigned 128-bit data w/ flat address.
1250 */
1251IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1252{
1253#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1254 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1255#else
1256 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1257#endif
1258}
1259
1260
1261/**
1262 * Used by TB code to store unsigned 128-bit data w/ flat address.
1263 */
1264IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1265{
1266#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1267 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1268#else
1269 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1270#endif
1271}
1272
1273
1274/**
1275 * Used by TB code to store unsigned 256-bit data w/ flat address.
1276 */
1277IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1278{
1279#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1280 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1281#else
1282 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1283#endif
1284}
1285
1286
1287/**
1288 * Used by TB code to store unsigned 256-bit data w/ flat address.
1289 */
1290IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1293 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1294#else
1295 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1296#endif
1297}
1298#endif
1299
1300
1301
1302/**
1303 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1304 */
1305IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1308 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1309#else
1310 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1319{
1320#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1321 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1322#else
1323 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1324#endif
1325}
1326
1327
1328/**
1329 * Used by TB code to store a segment selector value onto a flat stack.
1330 *
1331 * Intel CPUs doesn't do write a whole dword, thus the special function.
1332 */
1333IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1336 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1337#else
1338 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1347{
1348#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1349 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1350#else
1351 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1352#endif
1353}
1354
1355
1356/**
1357 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1358 */
1359IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1360{
1361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1362 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1363#else
1364 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1365#endif
1366}
1367
1368
1369/**
1370 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1371 */
1372IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1373{
1374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1375 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1376#else
1377 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1378#endif
1379}
1380
1381
1382/**
1383 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1384 */
1385IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1386{
1387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1388 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1389#else
1390 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1391#endif
1392}
1393
1394
1395
1396/*********************************************************************************************************************************
1397* Helpers: Segmented memory mapping. *
1398*********************************************************************************************************************************/
1399
1400/**
1401 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1402 * segmentation.
1403 */
1404IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1405 RTGCPTR GCPtrMem, uint8_t iSegReg))
1406{
1407#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1408 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1409#else
1410 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1411#endif
1412}
1413
1414
1415/**
1416 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1417 */
1418IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1419 RTGCPTR GCPtrMem, uint8_t iSegReg))
1420{
1421#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1422 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1423#else
1424 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1425#endif
1426}
1427
1428
1429/**
1430 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1431 */
1432IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1433 RTGCPTR GCPtrMem, uint8_t iSegReg))
1434{
1435#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1436 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1437#else
1438 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1439#endif
1440}
1441
1442
1443/**
1444 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1445 */
1446IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1447 RTGCPTR GCPtrMem, uint8_t iSegReg))
1448{
1449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1450 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1451#else
1452 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1453#endif
1454}
1455
1456
1457/**
1458 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1459 * segmentation.
1460 */
1461IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1462 RTGCPTR GCPtrMem, uint8_t iSegReg))
1463{
1464#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1465 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1466#else
1467 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1468#endif
1469}
1470
1471
1472/**
1473 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1474 */
1475IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1476 RTGCPTR GCPtrMem, uint8_t iSegReg))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1480#else
1481 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1490 RTGCPTR GCPtrMem, uint8_t iSegReg))
1491{
1492#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1493 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1494#else
1495 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1496#endif
1497}
1498
1499
1500/**
1501 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1504 RTGCPTR GCPtrMem, uint8_t iSegReg))
1505{
1506#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1507 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1508#else
1509 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1510#endif
1511}
1512
1513
1514/**
1515 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1516 * segmentation.
1517 */
1518IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1519 RTGCPTR GCPtrMem, uint8_t iSegReg))
1520{
1521#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1522 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1523#else
1524 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1525#endif
1526}
1527
1528
1529/**
1530 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1531 */
1532IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1533 RTGCPTR GCPtrMem, uint8_t iSegReg))
1534{
1535#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1536 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1537#else
1538 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1539#endif
1540}
1541
1542
1543/**
1544 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1545 */
1546IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1547 RTGCPTR GCPtrMem, uint8_t iSegReg))
1548{
1549#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1550 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1551#else
1552 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1553#endif
1554}
1555
1556
1557/**
1558 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1559 */
1560IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1561 RTGCPTR GCPtrMem, uint8_t iSegReg))
1562{
1563#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1564 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1565#else
1566 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1567#endif
1568}
1569
1570
1571/**
1572 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1573 * segmentation.
1574 */
1575IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1576 RTGCPTR GCPtrMem, uint8_t iSegReg))
1577{
1578#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1579 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1580#else
1581 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1582#endif
1583}
1584
1585
1586/**
1587 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1588 */
1589IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1590 RTGCPTR GCPtrMem, uint8_t iSegReg))
1591{
1592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1593 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1594#else
1595 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1596#endif
1597}
1598
1599
1600/**
1601 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1604 RTGCPTR GCPtrMem, uint8_t iSegReg))
1605{
1606#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1607 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1608#else
1609 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1610#endif
1611}
1612
1613
1614/**
1615 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1616 */
1617IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1618 RTGCPTR GCPtrMem, uint8_t iSegReg))
1619{
1620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1621 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1622#else
1623 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1624#endif
1625}
1626
1627
1628/**
1629 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1630 */
1631IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1632 RTGCPTR GCPtrMem, uint8_t iSegReg))
1633{
1634#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1635 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1636#else
1637 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1638#endif
1639}
1640
1641
1642/**
1643 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1646 RTGCPTR GCPtrMem, uint8_t iSegReg))
1647{
1648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1649 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1650#else
1651 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1652#endif
1653}
1654
1655
1656/**
1657 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1658 * segmentation.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1661 RTGCPTR GCPtrMem, uint8_t iSegReg))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1665#else
1666 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1675 RTGCPTR GCPtrMem, uint8_t iSegReg))
1676{
1677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1678 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1679#else
1680 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1681#endif
1682}
1683
1684
1685/**
1686 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1689 RTGCPTR GCPtrMem, uint8_t iSegReg))
1690{
1691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1692 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1693#else
1694 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1695#endif
1696}
1697
1698
1699/**
1700 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1701 */
1702IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1703 RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1706 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1707#else
1708 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/*********************************************************************************************************************************
1714* Helpers: Flat memory mapping. *
1715*********************************************************************************************************************************/
1716
1717/**
1718 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1719 * address.
1720 */
1721IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1722{
1723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1724 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1725#else
1726 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1727#endif
1728}
1729
1730
1731/**
1732 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1735{
1736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1737 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1738#else
1739 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1740#endif
1741}
1742
1743
1744/**
1745 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1746 */
1747IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1748{
1749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1750 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1751#else
1752 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1753#endif
1754}
1755
1756
1757/**
1758 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1763 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1764#else
1765 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1772 * address.
1773 */
1774IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1775{
1776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1777 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1778#else
1779 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1780#endif
1781}
1782
1783
1784/**
1785 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1786 */
1787IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1788{
1789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1790 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1791#else
1792 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1793#endif
1794}
1795
1796
1797/**
1798 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1799 */
1800IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1801{
1802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1803 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1804#else
1805 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1806#endif
1807}
1808
1809
1810/**
1811 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1812 */
1813IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1814{
1815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1816 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1817#else
1818 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1819#endif
1820}
1821
1822
1823/**
1824 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1825 * address.
1826 */
1827IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1828{
1829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1830 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1831#else
1832 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1833#endif
1834}
1835
1836
1837/**
1838 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1843 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1844#else
1845 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1856 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1857#else
1858 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1865 */
1866IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1867{
1868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1869 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1870#else
1871 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1872#endif
1873}
1874
1875
1876/**
1877 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1878 * address.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1883 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1884#else
1885 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1896 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1897#else
1898 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1909 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1910#else
1911 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1922 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1923#else
1924 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1935 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1936#else
1937 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1948 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1949#else
1950 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1957 * address.
1958 */
1959IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1960{
1961#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1962 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1963#else
1964 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1965#endif
1966}
1967
1968
1969/**
1970 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1971 */
1972IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1973{
1974#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1975 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1976#else
1977 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1978#endif
1979}
1980
1981
1982/**
1983 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1984 */
1985IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1986{
1987#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1988 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1989#else
1990 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1991#endif
1992}
1993
1994
1995/**
1996 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1997 */
1998IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1999{
2000#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2001 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2002#else
2003 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2004#endif
2005}
2006
2007
2008/*********************************************************************************************************************************
2009* Helpers: Commit, rollback & unmap *
2010*********************************************************************************************************************************/
2011
2012/**
2013 * Used by TB code to commit and unmap a read-write memory mapping.
2014 */
2015IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2016{
2017 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2018}
2019
2020
2021/**
2022 * Used by TB code to commit and unmap a read-write memory mapping.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2025{
2026 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2027}
2028
2029
2030/**
2031 * Used by TB code to commit and unmap a write-only memory mapping.
2032 */
2033IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2034{
2035 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2036}
2037
2038
2039/**
2040 * Used by TB code to commit and unmap a read-only memory mapping.
2041 */
2042IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2043{
2044 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2045}
2046
2047
2048/**
2049 * Reinitializes the native recompiler state.
2050 *
2051 * Called before starting a new recompile job.
2052 */
2053static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2054{
2055 pReNative->cLabels = 0;
2056 pReNative->bmLabelTypes = 0;
2057 pReNative->cFixups = 0;
2058#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2059 pReNative->cTbExitFixups = 0;
2060#endif
2061#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2062 pReNative->pDbgInfo->cEntries = 0;
2063 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2064#endif
2065 pReNative->pTbOrg = pTb;
2066 pReNative->cCondDepth = 0;
2067 pReNative->uCondSeqNo = 0;
2068 pReNative->uCheckIrqSeqNo = 0;
2069 pReNative->uTlbSeqNo = 0;
2070
2071#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2072 pReNative->Core.offPc = 0;
2073# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2074 pReNative->Core.idxInstrPlusOneOfLastPcUpdate = 0;
2075# endif
2076# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2077 pReNative->Core.fDebugPcInitialized = false;
2078# endif
2079#endif
2080#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2081 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2082#endif
2083 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2084#if IEMNATIVE_HST_GREG_COUNT < 32
2085 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2086#endif
2087 ;
2088 pReNative->Core.bmHstRegsWithGstShadow = 0;
2089 pReNative->Core.bmGstRegShadows = 0;
2090#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2091 pReNative->Core.bmGstRegShadowDirty = 0;
2092#endif
2093 pReNative->Core.bmVars = 0;
2094 pReNative->Core.bmStack = 0;
2095 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2096 pReNative->Core.u64ArgVars = UINT64_MAX;
2097
2098 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2099 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2120 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2121 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2122
2123 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2124
2125 /* Full host register reinit: */
2126 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2127 {
2128 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2129 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2130 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2131 }
2132
2133 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2134 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2135#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2136 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2137#endif
2138#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2139 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2140#endif
2141#ifdef IEMNATIVE_REG_FIXED_TMP1
2142 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2143#endif
2144#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2145 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2146#endif
2147 );
2148 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2149 {
2150 fRegs &= ~RT_BIT_32(idxReg);
2151 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2152 }
2153
2154 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2155#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2156 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2157#endif
2158#ifdef IEMNATIVE_REG_FIXED_TMP0
2159 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2160#endif
2161#ifdef IEMNATIVE_REG_FIXED_TMP1
2162 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2163#endif
2164#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2165 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2166#endif
2167
2168#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2169 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2170# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2171 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2172# endif
2173 ;
2174 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2175 pReNative->Core.bmGstSimdRegShadows = 0;
2176 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2177 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2178
2179 /* Full host register reinit: */
2180 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2181 {
2182 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2183 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2184 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2185 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2186 }
2187
2188 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2189 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2190 {
2191 fRegs &= ~RT_BIT_32(idxReg);
2192 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2193 }
2194
2195#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2196 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2197#endif
2198
2199#endif
2200
2201 return pReNative;
2202}
2203
2204
2205/**
2206 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2207 */
2208static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2209{
2210 RTMemFree(pReNative->pInstrBuf);
2211 RTMemFree(pReNative->paLabels);
2212 RTMemFree(pReNative->paFixups);
2213#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2214 RTMemFree(pReNative->paTbExitFixups);
2215#endif
2216#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2217 RTMemFree(pReNative->pDbgInfo);
2218#endif
2219 RTMemFree(pReNative);
2220}
2221
2222
2223/**
2224 * Allocates and initializes the native recompiler state.
2225 *
2226 * This is called the first time an EMT wants to recompile something.
2227 *
2228 * @returns Pointer to the new recompiler state.
2229 * @param pVCpu The cross context virtual CPU structure of the calling
2230 * thread.
2231 * @param pTb The TB that's about to be recompiled. When this is NULL,
2232 * the recompiler state is for emitting the common per-chunk
2233 * code from iemNativeRecompileAttachExecMemChunkCtx.
2234 * @thread EMT(pVCpu)
2235 */
2236static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2237{
2238 VMCPU_ASSERT_EMT(pVCpu);
2239
2240 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2241 AssertReturn(pReNative, NULL);
2242
2243 /*
2244 * Try allocate all the buffers and stuff we need.
2245 */
2246 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2247 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2248 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2249 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2250#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2251 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2252#endif
2253#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2254 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2255#endif
2256 if (RT_LIKELY( pReNative->pInstrBuf
2257 && pReNative->paLabels
2258 && pReNative->paFixups)
2259#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2260 && pReNative->paTbExitFixups
2261#endif
2262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2263 && pReNative->pDbgInfo
2264#endif
2265 )
2266 {
2267 /*
2268 * Set the buffer & array sizes on success.
2269 */
2270 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2271 pReNative->cLabelsAlloc = _8K / cFactor;
2272 pReNative->cFixupsAlloc = _16K / cFactor;
2273#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2274 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2275#endif
2276#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2277 pReNative->cDbgInfoAlloc = _16K / cFactor;
2278#endif
2279
2280 /* Other constant stuff: */
2281 pReNative->pVCpu = pVCpu;
2282
2283 /*
2284 * Done, just reinit it.
2285 */
2286 return iemNativeReInit(pReNative, pTb);
2287 }
2288
2289 /*
2290 * Failed. Cleanup and return.
2291 */
2292 AssertFailed();
2293 iemNativeTerm(pReNative);
2294 return NULL;
2295}
2296
2297
2298/**
2299 * Creates a label
2300 *
2301 * If the label does not yet have a defined position,
2302 * call iemNativeLabelDefine() later to set it.
2303 *
2304 * @returns Label ID. Throws VBox status code on failure, so no need to check
2305 * the return value.
2306 * @param pReNative The native recompile state.
2307 * @param enmType The label type.
2308 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2309 * label is not yet defined (default).
2310 * @param uData Data associated with the lable. Only applicable to
2311 * certain type of labels. Default is zero.
2312 */
2313DECL_HIDDEN_THROW(uint32_t)
2314iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2315 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2316{
2317 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2318#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
2319 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2320#endif
2321
2322 /*
2323 * Locate existing label definition.
2324 *
2325 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2326 * and uData is zero.
2327 */
2328 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2329 uint32_t const cLabels = pReNative->cLabels;
2330 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2331#ifndef VBOX_STRICT
2332 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2333 && offWhere == UINT32_MAX
2334 && uData == 0
2335#endif
2336 )
2337 {
2338#ifndef VBOX_STRICT
2339 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2340 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2341 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2342 if (idxLabel < pReNative->cLabels)
2343 return idxLabel;
2344#else
2345 for (uint32_t i = 0; i < cLabels; i++)
2346 if ( paLabels[i].enmType == enmType
2347 && paLabels[i].uData == uData)
2348 {
2349 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2350 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2351 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2352 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2353 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2354 return i;
2355 }
2356 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2357 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2358#endif
2359 }
2360
2361 /*
2362 * Make sure we've got room for another label.
2363 */
2364 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2365 { /* likely */ }
2366 else
2367 {
2368 uint32_t cNew = pReNative->cLabelsAlloc;
2369 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2370 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2371 cNew *= 2;
2372 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2373 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2374 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2375 pReNative->paLabels = paLabels;
2376 pReNative->cLabelsAlloc = cNew;
2377 }
2378
2379 /*
2380 * Define a new label.
2381 */
2382 paLabels[cLabels].off = offWhere;
2383 paLabels[cLabels].enmType = enmType;
2384 paLabels[cLabels].uData = uData;
2385 pReNative->cLabels = cLabels + 1;
2386
2387 Assert((unsigned)enmType < 64);
2388 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2389
2390 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2391 {
2392 Assert(uData == 0);
2393 pReNative->aidxUniqueLabels[enmType] = cLabels;
2394 }
2395
2396 if (offWhere != UINT32_MAX)
2397 {
2398#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2399 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2400 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2401#endif
2402 }
2403 return cLabels;
2404}
2405
2406
2407/**
2408 * Defines the location of an existing label.
2409 *
2410 * @param pReNative The native recompile state.
2411 * @param idxLabel The label to define.
2412 * @param offWhere The position.
2413 */
2414DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2415{
2416 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2417 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2418 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2419 pLabel->off = offWhere;
2420#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2421 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2422 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2423#endif
2424}
2425
2426
2427/**
2428 * Looks up a lable.
2429 *
2430 * @returns Label ID if found, UINT32_MAX if not.
2431 */
2432DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2433 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2434{
2435 Assert((unsigned)enmType < 64);
2436 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2437 {
2438 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2439 return pReNative->aidxUniqueLabels[enmType];
2440
2441 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2442 uint32_t const cLabels = pReNative->cLabels;
2443 for (uint32_t i = 0; i < cLabels; i++)
2444 if ( paLabels[i].enmType == enmType
2445 && paLabels[i].uData == uData
2446 && ( paLabels[i].off == offWhere
2447 || offWhere == UINT32_MAX
2448 || paLabels[i].off == UINT32_MAX))
2449 return i;
2450 }
2451 return UINT32_MAX;
2452}
2453
2454
2455/**
2456 * Adds a fixup.
2457 *
2458 * @throws VBox status code (int) on failure.
2459 * @param pReNative The native recompile state.
2460 * @param offWhere The instruction offset of the fixup location.
2461 * @param idxLabel The target label ID for the fixup.
2462 * @param enmType The fixup type.
2463 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2464 */
2465DECL_HIDDEN_THROW(void)
2466iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2467 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2468{
2469 Assert(idxLabel <= UINT16_MAX);
2470 Assert((unsigned)enmType <= UINT8_MAX);
2471#ifdef RT_ARCH_ARM64
2472 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2473 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2474 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2475#endif
2476
2477 /*
2478 * Make sure we've room.
2479 */
2480 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2481 uint32_t const cFixups = pReNative->cFixups;
2482 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2483 { /* likely */ }
2484 else
2485 {
2486 uint32_t cNew = pReNative->cFixupsAlloc;
2487 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2488 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2489 cNew *= 2;
2490 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2491 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2492 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2493 pReNative->paFixups = paFixups;
2494 pReNative->cFixupsAlloc = cNew;
2495 }
2496
2497 /*
2498 * Add the fixup.
2499 */
2500 paFixups[cFixups].off = offWhere;
2501 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2502 paFixups[cFixups].enmType = enmType;
2503 paFixups[cFixups].offAddend = offAddend;
2504 pReNative->cFixups = cFixups + 1;
2505}
2506
2507
2508#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2509/**
2510 * Adds a fixup to the per chunk tail code.
2511 *
2512 * @throws VBox status code (int) on failure.
2513 * @param pReNative The native recompile state.
2514 * @param offWhere The instruction offset of the fixup location.
2515 * @param enmExitReason The exit reason to jump to.
2516 */
2517DECL_HIDDEN_THROW(void)
2518iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2519{
2520 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2521
2522 /*
2523 * Make sure we've room.
2524 */
2525 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2526 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2527 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2528 { /* likely */ }
2529 else
2530 {
2531 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2532 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2533 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2534 cNew *= 2;
2535 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2536 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2537 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2538 pReNative->paTbExitFixups = paTbExitFixups;
2539 pReNative->cTbExitFixupsAlloc = cNew;
2540 }
2541
2542 /*
2543 * Add the fixup.
2544 */
2545 paTbExitFixups[cTbExitFixups].off = offWhere;
2546 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2547 pReNative->cTbExitFixups = cTbExitFixups + 1;
2548}
2549#endif
2550
2551
2552/**
2553 * Slow code path for iemNativeInstrBufEnsure.
2554 */
2555DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2556{
2557 /* Double the buffer size till we meet the request. */
2558 uint32_t cNew = pReNative->cInstrBufAlloc;
2559 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2560 do
2561 cNew *= 2;
2562 while (cNew < off + cInstrReq);
2563
2564 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2565#ifdef RT_ARCH_ARM64
2566 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2567#else
2568 uint32_t const cbMaxInstrBuf = _2M;
2569#endif
2570 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2571
2572 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2573 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2574
2575#ifdef VBOX_STRICT
2576 pReNative->offInstrBufChecked = off + cInstrReq;
2577#endif
2578 pReNative->cInstrBufAlloc = cNew;
2579 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2580}
2581
2582#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2583
2584/**
2585 * Grows the static debug info array used during recompilation.
2586 *
2587 * @returns Pointer to the new debug info block; throws VBox status code on
2588 * failure, so no need to check the return value.
2589 */
2590DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2591{
2592 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2593 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2594 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2595 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2596 pReNative->pDbgInfo = pDbgInfo;
2597 pReNative->cDbgInfoAlloc = cNew;
2598 return pDbgInfo;
2599}
2600
2601
2602/**
2603 * Adds a new debug info uninitialized entry, returning the pointer to it.
2604 */
2605DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2606{
2607 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2608 { /* likely */ }
2609 else
2610 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2611 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2612}
2613
2614
2615/**
2616 * Debug Info: Adds a native offset record, if necessary.
2617 */
2618DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2619{
2620 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2621
2622 /*
2623 * Do we need this one?
2624 */
2625 uint32_t const offPrev = pDbgInfo->offNativeLast;
2626 if (offPrev == off)
2627 return;
2628 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2629
2630 /*
2631 * Add it.
2632 */
2633 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2634 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2635 pEntry->NativeOffset.offNative = off;
2636 pDbgInfo->offNativeLast = off;
2637}
2638
2639
2640/**
2641 * Debug Info: Record info about a label.
2642 */
2643static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2644{
2645 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2646 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2647 pEntry->Label.uUnused = 0;
2648 pEntry->Label.enmLabel = (uint8_t)enmType;
2649 pEntry->Label.uData = uData;
2650}
2651
2652
2653/**
2654 * Debug Info: Record info about a threaded call.
2655 */
2656static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2657{
2658 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2659 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2660 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2661 pEntry->ThreadedCall.uUnused = 0;
2662 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2663}
2664
2665
2666/**
2667 * Debug Info: Record info about a new guest instruction.
2668 */
2669static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2670{
2671 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2672 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2673 pEntry->GuestInstruction.uUnused = 0;
2674 pEntry->GuestInstruction.fExec = fExec;
2675}
2676
2677
2678/**
2679 * Debug Info: Record info about guest register shadowing.
2680 */
2681DECL_HIDDEN_THROW(void)
2682iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2683 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2684{
2685 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2686 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2687 pEntry->GuestRegShadowing.uUnused = 0;
2688 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2689 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2690 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2691#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2692 Assert( idxHstReg != UINT8_MAX
2693 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2694#endif
2695}
2696
2697
2698# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2699/**
2700 * Debug Info: Record info about guest register shadowing.
2701 */
2702DECL_HIDDEN_THROW(void)
2703iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2704 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2705{
2706 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2707 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2708 pEntry->GuestSimdRegShadowing.uUnused = 0;
2709 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2710 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2711 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2712}
2713# endif
2714
2715
2716# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2717/**
2718 * Debug Info: Record info about delayed RIP updates.
2719 */
2720DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2721{
2722 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2723 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2724 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2725 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2726}
2727# endif
2728
2729# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2730
2731/**
2732 * Debug Info: Record info about a dirty guest register.
2733 */
2734DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2735 uint8_t idxGstReg, uint8_t idxHstReg)
2736{
2737 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2738 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2739 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2740 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2741 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2742}
2743
2744
2745/**
2746 * Debug Info: Record info about a dirty guest register writeback operation.
2747 */
2748DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2749{
2750 unsigned const cBitsGstRegMask = 25;
2751 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2752
2753 /* The first block of 25 bits: */
2754 if (fGstReg & fGstRegMask)
2755 {
2756 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2757 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2758 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2759 pEntry->GuestRegWriteback.cShift = 0;
2760 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2761 fGstReg &= ~(uint64_t)fGstRegMask;
2762 if (!fGstReg)
2763 return;
2764 }
2765
2766 /* The second block of 25 bits: */
2767 fGstReg >>= cBitsGstRegMask;
2768 if (fGstReg & fGstRegMask)
2769 {
2770 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2771 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2772 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2773 pEntry->GuestRegWriteback.cShift = 0;
2774 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2775 fGstReg &= ~(uint64_t)fGstRegMask;
2776 if (!fGstReg)
2777 return;
2778 }
2779
2780 /* The last block with 14 bits: */
2781 fGstReg >>= cBitsGstRegMask;
2782 Assert(fGstReg & fGstRegMask);
2783 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2784 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2785 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2786 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2787 pEntry->GuestRegWriteback.cShift = 2;
2788 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2789}
2790
2791# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2792
2793#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2794
2795
2796/*********************************************************************************************************************************
2797* Register Allocator *
2798*********************************************************************************************************************************/
2799
2800/**
2801 * Register parameter indexes (indexed by argument number).
2802 */
2803DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2804{
2805 IEMNATIVE_CALL_ARG0_GREG,
2806 IEMNATIVE_CALL_ARG1_GREG,
2807 IEMNATIVE_CALL_ARG2_GREG,
2808 IEMNATIVE_CALL_ARG3_GREG,
2809#if defined(IEMNATIVE_CALL_ARG4_GREG)
2810 IEMNATIVE_CALL_ARG4_GREG,
2811# if defined(IEMNATIVE_CALL_ARG5_GREG)
2812 IEMNATIVE_CALL_ARG5_GREG,
2813# if defined(IEMNATIVE_CALL_ARG6_GREG)
2814 IEMNATIVE_CALL_ARG6_GREG,
2815# if defined(IEMNATIVE_CALL_ARG7_GREG)
2816 IEMNATIVE_CALL_ARG7_GREG,
2817# endif
2818# endif
2819# endif
2820#endif
2821};
2822AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2823
2824/**
2825 * Call register masks indexed by argument count.
2826 */
2827DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2828{
2829 0,
2830 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2831 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2832 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2833 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2834 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2835#if defined(IEMNATIVE_CALL_ARG4_GREG)
2836 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2837 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2838# if defined(IEMNATIVE_CALL_ARG5_GREG)
2839 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2840 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2841# if defined(IEMNATIVE_CALL_ARG6_GREG)
2842 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2843 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2844 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2845# if defined(IEMNATIVE_CALL_ARG7_GREG)
2846 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2847 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2848 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2849# endif
2850# endif
2851# endif
2852#endif
2853};
2854
2855#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2856/**
2857 * BP offset of the stack argument slots.
2858 *
2859 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2860 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2861 */
2862DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2863{
2864 IEMNATIVE_FP_OFF_STACK_ARG0,
2865# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2866 IEMNATIVE_FP_OFF_STACK_ARG1,
2867# endif
2868# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2869 IEMNATIVE_FP_OFF_STACK_ARG2,
2870# endif
2871# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2872 IEMNATIVE_FP_OFF_STACK_ARG3,
2873# endif
2874};
2875AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2876#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2877
2878/**
2879 * Info about shadowed guest register values.
2880 * @see IEMNATIVEGSTREG
2881 */
2882DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2883{
2884#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2885 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2886 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2887 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2888 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2889 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2897 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2898 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2899 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2900 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2901 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2902 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2903 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2904 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2905 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2906 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2907 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2908 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2909 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2910 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2911 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2912 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2913 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2914 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2915 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2916 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2917 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2918 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2919 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2920 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2921 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2922 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2923 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2924 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2925 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2926 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2927 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2928 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2929 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2930 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2931 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2932 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2933#undef CPUMCTX_OFF_AND_SIZE
2934};
2935AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2936
2937
2938/** Host CPU general purpose register names. */
2939DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2940{
2941#ifdef RT_ARCH_AMD64
2942 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2943#elif RT_ARCH_ARM64
2944 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2945 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2946#else
2947# error "port me"
2948#endif
2949};
2950
2951
2952#if 0 /* unused */
2953/**
2954 * Tries to locate a suitable register in the given register mask.
2955 *
2956 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2957 * failed.
2958 *
2959 * @returns Host register number on success, returns UINT8_MAX on failure.
2960 */
2961static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2962{
2963 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2964 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2965 if (fRegs)
2966 {
2967 /** @todo pick better here: */
2968 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2969
2970 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2971 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2972 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2973 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2974
2975 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2976 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2977 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2978 return idxReg;
2979 }
2980 return UINT8_MAX;
2981}
2982#endif /* unused */
2983
2984#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2985
2986/**
2987 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2988 *
2989 * @returns New code buffer offset on success, UINT32_MAX on failure.
2990 * @param pReNative .
2991 * @param off The current code buffer position.
2992 * @param enmGstReg The guest register to store to.
2993 * @param idxHstReg The host register to store from.
2994 */
2995DECL_FORCE_INLINE_THROW(uint32_t)
2996iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2997{
2998 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2999 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3000
3001 switch (g_aGstShadowInfo[enmGstReg].cb)
3002 {
3003 case sizeof(uint64_t):
3004 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3005 case sizeof(uint32_t):
3006 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3007 case sizeof(uint16_t):
3008 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3009# if 0 /* not present in the table. */
3010 case sizeof(uint8_t):
3011 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3012# endif
3013 default:
3014 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3015 }
3016}
3017
3018
3019/**
3020 * Emits code to flush a pending write of the given guest register,
3021 * version with alternative core state.
3022 *
3023 * @returns New code buffer offset.
3024 * @param pReNative The native recompile state.
3025 * @param off Current code buffer position.
3026 * @param pCore Alternative core state.
3027 * @param enmGstReg The guest register to flush.
3028 */
3029DECL_HIDDEN_THROW(uint32_t)
3030iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3031{
3032 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3033
3034 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3035 && enmGstReg <= kIemNativeGstReg_GprLast)
3036 || enmGstReg == kIemNativeGstReg_MxCsr);
3037 Assert( idxHstReg != UINT8_MAX
3038 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3039 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3040 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3041
3042 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3043
3044 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3045 return off;
3046}
3047
3048
3049/**
3050 * Emits code to flush a pending write of the given guest register.
3051 *
3052 * @returns New code buffer offset.
3053 * @param pReNative The native recompile state.
3054 * @param off Current code buffer position.
3055 * @param enmGstReg The guest register to flush.
3056 */
3057DECL_HIDDEN_THROW(uint32_t)
3058iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3059{
3060 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3061
3062 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3063 && enmGstReg <= kIemNativeGstReg_GprLast)
3064 || enmGstReg == kIemNativeGstReg_MxCsr);
3065 Assert( idxHstReg != UINT8_MAX
3066 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3067 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3068 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3069
3070 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3071
3072 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3073 return off;
3074}
3075
3076
3077/**
3078 * Flush the given set of guest registers if marked as dirty.
3079 *
3080 * @returns New code buffer offset.
3081 * @param pReNative The native recompile state.
3082 * @param off Current code buffer position.
3083 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3084 */
3085DECL_HIDDEN_THROW(uint32_t)
3086iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3087{
3088 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3089 if (bmGstRegShadowDirty)
3090 {
3091# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3092 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3093 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3094# endif
3095 do
3096 {
3097 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3098 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3099 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3100 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3101 } while (bmGstRegShadowDirty);
3102 }
3103
3104 return off;
3105}
3106
3107
3108/**
3109 * Flush all shadowed guest registers marked as dirty for the given host register.
3110 *
3111 * @returns New code buffer offset.
3112 * @param pReNative The native recompile state.
3113 * @param off Current code buffer position.
3114 * @param idxHstReg The host register.
3115 *
3116 * @note This doesn't do any unshadowing of guest registers from the host register.
3117 */
3118DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3119{
3120 /* We need to flush any pending guest register writes this host register shadows. */
3121 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3122 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3123 {
3124# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3125 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3126 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3127# endif
3128 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3129 do
3130 {
3131 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3132 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3133 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3134 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3135 } while (bmGstRegShadowDirty);
3136 }
3137
3138 return off;
3139}
3140
3141#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3142
3143
3144/**
3145 * Locate a register, possibly freeing one up.
3146 *
3147 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3148 * failed.
3149 *
3150 * @returns Host register number on success. Returns UINT8_MAX if no registers
3151 * found, the caller is supposed to deal with this and raise a
3152 * allocation type specific status code (if desired).
3153 *
3154 * @throws VBox status code if we're run into trouble spilling a variable of
3155 * recording debug info. Does NOT throw anything if we're out of
3156 * registers, though.
3157 */
3158static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3159 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3160{
3161 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3162 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3163 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3164
3165 /*
3166 * Try a freed register that's shadowing a guest register.
3167 */
3168 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3169 if (fRegs)
3170 {
3171 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3172
3173#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3174 /*
3175 * When we have livness information, we use it to kick out all shadowed
3176 * guest register that will not be needed any more in this TB. If we're
3177 * lucky, this may prevent us from ending up here again.
3178 *
3179 * Note! We must consider the previous entry here so we don't free
3180 * anything that the current threaded function requires (current
3181 * entry is produced by the next threaded function).
3182 */
3183 uint32_t const idxCurCall = pReNative->idxCurCall;
3184 if (idxCurCall > 0)
3185 {
3186 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3187
3188# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3189 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3190 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3191 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3192#else
3193 /* Construct a mask of the registers not in the read or write state.
3194 Note! We could skips writes, if they aren't from us, as this is just
3195 a hack to prevent trashing registers that have just been written
3196 or will be written when we retire the current instruction. */
3197 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3198 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3199 & IEMLIVENESSBIT_MASK;
3200#endif
3201 /* Merge EFLAGS. */
3202 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3203 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3204 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3205 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3206 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3207
3208 /* If it matches any shadowed registers. */
3209 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3210 {
3211#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3212 /* Writeback any dirty shadow registers we are about to unshadow. */
3213 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3214#endif
3215
3216 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3217 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3218 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3219
3220 /* See if we've got any unshadowed registers we can return now. */
3221 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3222 if (fUnshadowedRegs)
3223 {
3224 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3225 return (fPreferVolatile
3226 ? ASMBitFirstSetU32(fUnshadowedRegs)
3227 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3228 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3229 - 1;
3230 }
3231 }
3232 }
3233#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3234
3235 unsigned const idxReg = (fPreferVolatile
3236 ? ASMBitFirstSetU32(fRegs)
3237 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3238 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3239 - 1;
3240
3241 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3242 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3243 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3244 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3245
3246#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3247 /* We need to flush any pending guest register writes this host register shadows. */
3248 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3249#endif
3250
3251 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3252 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3253 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3254 return idxReg;
3255 }
3256
3257 /*
3258 * Try free up a variable that's in a register.
3259 *
3260 * We do two rounds here, first evacuating variables we don't need to be
3261 * saved on the stack, then in the second round move things to the stack.
3262 */
3263 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3264 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3265 {
3266 uint32_t fVars = pReNative->Core.bmVars;
3267 while (fVars)
3268 {
3269 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3270 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3271#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3272 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3273 continue;
3274#endif
3275
3276 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3277 && (RT_BIT_32(idxReg) & fRegMask)
3278 && ( iLoop == 0
3279 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3280 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3281 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3282 {
3283 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3284 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3285 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3286 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3287 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3288 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3289#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3290 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3291#endif
3292
3293 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3294 {
3295 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3296 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3297 }
3298
3299 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3300 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3301
3302 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3303 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3304 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3305 return idxReg;
3306 }
3307 fVars &= ~RT_BIT_32(idxVar);
3308 }
3309 }
3310
3311 return UINT8_MAX;
3312}
3313
3314
3315/**
3316 * Reassigns a variable to a different register specified by the caller.
3317 *
3318 * @returns The new code buffer position.
3319 * @param pReNative The native recompile state.
3320 * @param off The current code buffer position.
3321 * @param idxVar The variable index.
3322 * @param idxRegOld The old host register number.
3323 * @param idxRegNew The new host register number.
3324 * @param pszCaller The caller for logging.
3325 */
3326static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3327 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3328{
3329 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3330 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3331#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3332 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3333#endif
3334 RT_NOREF(pszCaller);
3335
3336#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3337 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3338#endif
3339 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3340
3341 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3342#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3343 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3344#endif
3345 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3346 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3348
3349 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3350 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3351 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3352 if (fGstRegShadows)
3353 {
3354 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3355 | RT_BIT_32(idxRegNew);
3356 while (fGstRegShadows)
3357 {
3358 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3359 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3360
3361 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3362 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3363 }
3364 }
3365
3366 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3367 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3368 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3369 return off;
3370}
3371
3372
3373/**
3374 * Moves a variable to a different register or spills it onto the stack.
3375 *
3376 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3377 * kinds can easily be recreated if needed later.
3378 *
3379 * @returns The new code buffer position.
3380 * @param pReNative The native recompile state.
3381 * @param off The current code buffer position.
3382 * @param idxVar The variable index.
3383 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3384 * call-volatile registers.
3385 */
3386DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3387 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3388{
3389 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3390 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3391 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3392 Assert(!pVar->fRegAcquired);
3393
3394 uint8_t const idxRegOld = pVar->idxReg;
3395 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3396 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3397 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3398 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3399 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3400 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3401 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3402 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3403#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3404 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3405#endif
3406
3407
3408 /** @todo Add statistics on this.*/
3409 /** @todo Implement basic variable liveness analysis (python) so variables
3410 * can be freed immediately once no longer used. This has the potential to
3411 * be trashing registers and stack for dead variables.
3412 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3413
3414 /*
3415 * First try move it to a different register, as that's cheaper.
3416 */
3417 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3418 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3419 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3420 if (fRegs)
3421 {
3422 /* Avoid using shadow registers, if possible. */
3423 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3424 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3425 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3426 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3427 }
3428
3429 /*
3430 * Otherwise we must spill the register onto the stack.
3431 */
3432 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3433 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3434 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3435 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3436
3437 pVar->idxReg = UINT8_MAX;
3438 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3439 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3440 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3441 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3442 return off;
3443}
3444
3445
3446/**
3447 * Allocates a temporary host general purpose register.
3448 *
3449 * This may emit code to save register content onto the stack in order to free
3450 * up a register.
3451 *
3452 * @returns The host register number; throws VBox status code on failure,
3453 * so no need to check the return value.
3454 * @param pReNative The native recompile state.
3455 * @param poff Pointer to the variable with the code buffer position.
3456 * This will be update if we need to move a variable from
3457 * register to stack in order to satisfy the request.
3458 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3459 * registers (@c true, default) or the other way around
3460 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3461 */
3462DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3463{
3464 /*
3465 * Try find a completely unused register, preferably a call-volatile one.
3466 */
3467 uint8_t idxReg;
3468 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3469 & ~pReNative->Core.bmHstRegsWithGstShadow
3470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3471 if (fRegs)
3472 {
3473 if (fPreferVolatile)
3474 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3475 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3476 else
3477 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3478 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3479 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3480 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3481 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3482 }
3483 else
3484 {
3485 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3486 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3487 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3488 }
3489 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3490}
3491
3492
3493/**
3494 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3495 * registers.
3496 *
3497 * @returns The host register number; throws VBox status code on failure,
3498 * so no need to check the return value.
3499 * @param pReNative The native recompile state.
3500 * @param poff Pointer to the variable with the code buffer position.
3501 * This will be update if we need to move a variable from
3502 * register to stack in order to satisfy the request.
3503 * @param fRegMask Mask of acceptable registers.
3504 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3505 * registers (@c true, default) or the other way around
3506 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3507 */
3508DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3509 bool fPreferVolatile /*= true*/)
3510{
3511 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3512 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3513
3514 /*
3515 * Try find a completely unused register, preferably a call-volatile one.
3516 */
3517 uint8_t idxReg;
3518 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3519 & ~pReNative->Core.bmHstRegsWithGstShadow
3520 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3521 & fRegMask;
3522 if (fRegs)
3523 {
3524 if (fPreferVolatile)
3525 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3526 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3527 else
3528 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3529 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3530 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3531 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3532 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3533 }
3534 else
3535 {
3536 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3537 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3538 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3539 }
3540 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3541}
3542
3543
3544/**
3545 * Allocates a temporary register for loading an immediate value into.
3546 *
3547 * This will emit code to load the immediate, unless there happens to be an
3548 * unused register with the value already loaded.
3549 *
3550 * The caller will not modify the returned register, it must be considered
3551 * read-only. Free using iemNativeRegFreeTmpImm.
3552 *
3553 * @returns The host register number; throws VBox status code on failure, so no
3554 * need to check the return value.
3555 * @param pReNative The native recompile state.
3556 * @param poff Pointer to the variable with the code buffer position.
3557 * @param uImm The immediate value that the register must hold upon
3558 * return.
3559 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3560 * registers (@c true, default) or the other way around
3561 * (@c false).
3562 *
3563 * @note Reusing immediate values has not been implemented yet.
3564 */
3565DECL_HIDDEN_THROW(uint8_t)
3566iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3567{
3568 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3569 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3570 return idxReg;
3571}
3572
3573
3574/**
3575 * Allocates a temporary host general purpose register for keeping a guest
3576 * register value.
3577 *
3578 * Since we may already have a register holding the guest register value,
3579 * code will be emitted to do the loading if that's not the case. Code may also
3580 * be emitted if we have to free up a register to satify the request.
3581 *
3582 * @returns The host register number; throws VBox status code on failure, so no
3583 * need to check the return value.
3584 * @param pReNative The native recompile state.
3585 * @param poff Pointer to the variable with the code buffer
3586 * position. This will be update if we need to move a
3587 * variable from register to stack in order to satisfy
3588 * the request.
3589 * @param enmGstReg The guest register that will is to be updated.
3590 * @param enmIntendedUse How the caller will be using the host register.
3591 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3592 * register is okay (default). The ASSUMPTION here is
3593 * that the caller has already flushed all volatile
3594 * registers, so this is only applied if we allocate a
3595 * new register.
3596 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3597 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3598 */
3599DECL_HIDDEN_THROW(uint8_t)
3600iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3601 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3602 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3603{
3604 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3605#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3606 AssertMsg( fSkipLivenessAssert
3607 || pReNative->idxCurCall == 0
3608 || enmGstReg == kIemNativeGstReg_Pc
3609 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3610 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3611 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3612 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3613 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3614 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3615#endif
3616 RT_NOREF(fSkipLivenessAssert);
3617#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3618 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3619#endif
3620 uint32_t const fRegMask = !fNoVolatileRegs
3621 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3622 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3623
3624 /*
3625 * First check if the guest register value is already in a host register.
3626 */
3627 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3628 {
3629 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3630 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3631 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3632 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3633
3634 /* It's not supposed to be allocated... */
3635 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3636 {
3637 /*
3638 * If the register will trash the guest shadow copy, try find a
3639 * completely unused register we can use instead. If that fails,
3640 * we need to disassociate the host reg from the guest reg.
3641 */
3642 /** @todo would be nice to know if preserving the register is in any way helpful. */
3643 /* If the purpose is calculations, try duplicate the register value as
3644 we'll be clobbering the shadow. */
3645 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3646 && ( ~pReNative->Core.bmHstRegs
3647 & ~pReNative->Core.bmHstRegsWithGstShadow
3648 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3649 {
3650 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3651
3652 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3653
3654 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3655 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3656 g_apszIemNativeHstRegNames[idxRegNew]));
3657 idxReg = idxRegNew;
3658 }
3659 /* If the current register matches the restrictions, go ahead and allocate
3660 it for the caller. */
3661 else if (fRegMask & RT_BIT_32(idxReg))
3662 {
3663 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3664 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3665 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3666 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3667 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3668 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3669 else
3670 {
3671 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3672 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3673 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3674 }
3675 }
3676 /* Otherwise, allocate a register that satisfies the caller and transfer
3677 the shadowing if compatible with the intended use. (This basically
3678 means the call wants a non-volatile register (RSP push/pop scenario).) */
3679 else
3680 {
3681 Assert(fNoVolatileRegs);
3682 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3683 !fNoVolatileRegs
3684 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3685 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3686 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3687 {
3688 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3689 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3690 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3691 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3692 }
3693 else
3694 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3695 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3696 g_apszIemNativeHstRegNames[idxRegNew]));
3697 idxReg = idxRegNew;
3698 }
3699 }
3700 else
3701 {
3702 /*
3703 * Oops. Shadowed guest register already allocated!
3704 *
3705 * Allocate a new register, copy the value and, if updating, the
3706 * guest shadow copy assignment to the new register.
3707 */
3708 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3709 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3710 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3711 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3712
3713 /** @todo share register for readonly access. */
3714 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3715 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3716
3717 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3718 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3719
3720 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3721 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3722 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3723 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3724 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3725 else
3726 {
3727 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3728 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3729 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3730 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3731 }
3732 idxReg = idxRegNew;
3733 }
3734 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3735
3736#ifdef VBOX_STRICT
3737 /* Strict builds: Check that the value is correct. */
3738 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3739#endif
3740
3741#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3742 /** @todo r=aeichner Implement for registers other than GPR as well. */
3743 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3744 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3745 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3746 && enmGstReg <= kIemNativeGstReg_GprLast)
3747 || enmGstReg == kIemNativeGstReg_MxCsr))
3748 {
3749# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3750 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3751 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3752# endif
3753 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3754 }
3755#endif
3756
3757 return idxReg;
3758 }
3759
3760 /*
3761 * Allocate a new register, load it with the guest value and designate it as a copy of the
3762 */
3763 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3764
3765 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3766 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3767
3768 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3769 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3770 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3771 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3772
3773#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3774 /** @todo r=aeichner Implement for registers other than GPR as well. */
3775 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3776 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3777 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3778 && enmGstReg <= kIemNativeGstReg_GprLast)
3779 || enmGstReg == kIemNativeGstReg_MxCsr))
3780 {
3781# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3782 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3783 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3784# endif
3785 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3786 }
3787#endif
3788
3789 return idxRegNew;
3790}
3791
3792
3793/**
3794 * Allocates a temporary host general purpose register that already holds the
3795 * given guest register value.
3796 *
3797 * The use case for this function is places where the shadowing state cannot be
3798 * modified due to branching and such. This will fail if the we don't have a
3799 * current shadow copy handy or if it's incompatible. The only code that will
3800 * be emitted here is value checking code in strict builds.
3801 *
3802 * The intended use can only be readonly!
3803 *
3804 * @returns The host register number, UINT8_MAX if not present.
3805 * @param pReNative The native recompile state.
3806 * @param poff Pointer to the instruction buffer offset.
3807 * Will be updated in strict builds if a register is
3808 * found.
3809 * @param enmGstReg The guest register that will is to be updated.
3810 * @note In strict builds, this may throw instruction buffer growth failures.
3811 * Non-strict builds will not throw anything.
3812 * @sa iemNativeRegAllocTmpForGuestReg
3813 */
3814DECL_HIDDEN_THROW(uint8_t)
3815iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3816{
3817 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3818#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3819 AssertMsg( pReNative->idxCurCall == 0
3820 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3821 || enmGstReg == kIemNativeGstReg_Pc,
3822 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3823#endif
3824
3825 /*
3826 * First check if the guest register value is already in a host register.
3827 */
3828 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3829 {
3830 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3831 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3832 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3833 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3834
3835 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3836 {
3837 /*
3838 * We only do readonly use here, so easy compared to the other
3839 * variant of this code.
3840 */
3841 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3842 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3843 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3844 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3845 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3846
3847#ifdef VBOX_STRICT
3848 /* Strict builds: Check that the value is correct. */
3849 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3850#else
3851 RT_NOREF(poff);
3852#endif
3853 return idxReg;
3854 }
3855 }
3856
3857 return UINT8_MAX;
3858}
3859
3860
3861/**
3862 * Allocates argument registers for a function call.
3863 *
3864 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3865 * need to check the return value.
3866 * @param pReNative The native recompile state.
3867 * @param off The current code buffer offset.
3868 * @param cArgs The number of arguments the function call takes.
3869 */
3870DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3871{
3872 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3873 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3874 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3875 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3876
3877 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3878 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3879 else if (cArgs == 0)
3880 return true;
3881
3882 /*
3883 * Do we get luck and all register are free and not shadowing anything?
3884 */
3885 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3886 for (uint32_t i = 0; i < cArgs; i++)
3887 {
3888 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3889 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3890 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3891 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3892 }
3893 /*
3894 * Okay, not lucky so we have to free up the registers.
3895 */
3896 else
3897 for (uint32_t i = 0; i < cArgs; i++)
3898 {
3899 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3900 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3901 {
3902 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3903 {
3904 case kIemNativeWhat_Var:
3905 {
3906 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3907 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3908 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3909 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3910 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3911#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3912 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3913#endif
3914
3915 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3916 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3917 else
3918 {
3919 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3920 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3921 }
3922 break;
3923 }
3924
3925 case kIemNativeWhat_Tmp:
3926 case kIemNativeWhat_Arg:
3927 case kIemNativeWhat_rc:
3928 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3929 default:
3930 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3931 }
3932
3933 }
3934 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3935 {
3936 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3937 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3938 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3939#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3940 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3941#endif
3942 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3943 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3944 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3945 }
3946 else
3947 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3948 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3949 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3950 }
3951 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3952 return true;
3953}
3954
3955
3956DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3957
3958
3959#if 0
3960/**
3961 * Frees a register assignment of any type.
3962 *
3963 * @param pReNative The native recompile state.
3964 * @param idxHstReg The register to free.
3965 *
3966 * @note Does not update variables.
3967 */
3968DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3969{
3970 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3971 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3972 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3973 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3974 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3975 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3976 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3977 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3978 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3979 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3980 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3981 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3982 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3983 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3984
3985 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3986 /* no flushing, right:
3987 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3988 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3989 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3990 */
3991}
3992#endif
3993
3994
3995/**
3996 * Frees a temporary register.
3997 *
3998 * Any shadow copies of guest registers assigned to the host register will not
3999 * be flushed by this operation.
4000 */
4001DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4002{
4003 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4004 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4005 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4006 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4007 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4008}
4009
4010
4011/**
4012 * Frees a temporary immediate register.
4013 *
4014 * It is assumed that the call has not modified the register, so it still hold
4015 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4016 */
4017DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4018{
4019 iemNativeRegFreeTmp(pReNative, idxHstReg);
4020}
4021
4022
4023/**
4024 * Frees a register assigned to a variable.
4025 *
4026 * The register will be disassociated from the variable.
4027 */
4028DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4029{
4030 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4031 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4032 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4033 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4034 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4035#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4036 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4037#endif
4038
4039 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4040 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4041 if (!fFlushShadows)
4042 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4043 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4044 else
4045 {
4046 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4047 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4048#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4049 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4050#endif
4051 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4052 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4053 uint64_t fGstRegShadows = fGstRegShadowsOld;
4054 while (fGstRegShadows)
4055 {
4056 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4057 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4058
4059 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4060 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4061 }
4062 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4063 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4064 }
4065}
4066
4067
4068#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4069# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4070/** Host CPU SIMD register names. */
4071DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4072{
4073# ifdef RT_ARCH_AMD64
4074 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4075# elif RT_ARCH_ARM64
4076 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4077 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4078# else
4079# error "port me"
4080# endif
4081};
4082# endif
4083
4084
4085/**
4086 * Frees a SIMD register assigned to a variable.
4087 *
4088 * The register will be disassociated from the variable.
4089 */
4090DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4091{
4092 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4093 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4094 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4096 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4097 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4098
4099 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4100 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4101 if (!fFlushShadows)
4102 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4103 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4104 else
4105 {
4106 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4107 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4108 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4109 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4110 uint64_t fGstRegShadows = fGstRegShadowsOld;
4111 while (fGstRegShadows)
4112 {
4113 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4114 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4115
4116 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4117 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4118 }
4119 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4120 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4121 }
4122}
4123
4124
4125/**
4126 * Reassigns a variable to a different SIMD register specified by the caller.
4127 *
4128 * @returns The new code buffer position.
4129 * @param pReNative The native recompile state.
4130 * @param off The current code buffer position.
4131 * @param idxVar The variable index.
4132 * @param idxRegOld The old host register number.
4133 * @param idxRegNew The new host register number.
4134 * @param pszCaller The caller for logging.
4135 */
4136static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4137 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4138{
4139 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4140 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4141 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4142 RT_NOREF(pszCaller);
4143
4144 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4145 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4146 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4147
4148 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4149 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4150 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4151
4152 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4153 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4154 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4155
4156 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4157 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4158 else
4159 {
4160 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4161 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4162 }
4163
4164 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4165 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4166 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4167 if (fGstRegShadows)
4168 {
4169 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4170 | RT_BIT_32(idxRegNew);
4171 while (fGstRegShadows)
4172 {
4173 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4174 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4175
4176 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4177 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4178 }
4179 }
4180
4181 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4182 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4183 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4184 return off;
4185}
4186
4187
4188/**
4189 * Moves a variable to a different register or spills it onto the stack.
4190 *
4191 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4192 * kinds can easily be recreated if needed later.
4193 *
4194 * @returns The new code buffer position.
4195 * @param pReNative The native recompile state.
4196 * @param off The current code buffer position.
4197 * @param idxVar The variable index.
4198 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4199 * call-volatile registers.
4200 */
4201DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4202 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4203{
4204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4205 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4206 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4207 Assert(!pVar->fRegAcquired);
4208 Assert(!pVar->fSimdReg);
4209
4210 uint8_t const idxRegOld = pVar->idxReg;
4211 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4212 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4213 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4214 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4215 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4216 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4217 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4218 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4219 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4220 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4221
4222 /** @todo Add statistics on this.*/
4223 /** @todo Implement basic variable liveness analysis (python) so variables
4224 * can be freed immediately once no longer used. This has the potential to
4225 * be trashing registers and stack for dead variables.
4226 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4227
4228 /*
4229 * First try move it to a different register, as that's cheaper.
4230 */
4231 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4232 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4233 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4234 if (fRegs)
4235 {
4236 /* Avoid using shadow registers, if possible. */
4237 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4238 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4239 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4240 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4241 }
4242
4243 /*
4244 * Otherwise we must spill the register onto the stack.
4245 */
4246 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4247 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4248 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4249
4250 if (pVar->cbVar == sizeof(RTUINT128U))
4251 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4252 else
4253 {
4254 Assert(pVar->cbVar == sizeof(RTUINT256U));
4255 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4256 }
4257
4258 pVar->idxReg = UINT8_MAX;
4259 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4260 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4261 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4262 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4263 return off;
4264}
4265
4266
4267/**
4268 * Called right before emitting a call instruction to move anything important
4269 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4270 * optionally freeing argument variables.
4271 *
4272 * @returns New code buffer offset, UINT32_MAX on failure.
4273 * @param pReNative The native recompile state.
4274 * @param off The code buffer offset.
4275 * @param cArgs The number of arguments the function call takes.
4276 * It is presumed that the host register part of these have
4277 * been allocated as such already and won't need moving,
4278 * just freeing.
4279 * @param fKeepVars Mask of variables that should keep their register
4280 * assignments. Caller must take care to handle these.
4281 */
4282DECL_HIDDEN_THROW(uint32_t)
4283iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4284{
4285 Assert(!cArgs); RT_NOREF(cArgs);
4286
4287 /* fKeepVars will reduce this mask. */
4288 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4289
4290 /*
4291 * Move anything important out of volatile registers.
4292 */
4293 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4294#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4295 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4296#endif
4297 ;
4298
4299 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4300 if (!fSimdRegsToMove)
4301 { /* likely */ }
4302 else
4303 {
4304 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4305 while (fSimdRegsToMove != 0)
4306 {
4307 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4308 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4309
4310 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4311 {
4312 case kIemNativeWhat_Var:
4313 {
4314 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4315 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4316 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4317 Assert(pVar->idxReg == idxSimdReg);
4318 Assert(pVar->fSimdReg);
4319 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4320 {
4321 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4322 idxVar, pVar->enmKind, pVar->idxReg));
4323 if (pVar->enmKind != kIemNativeVarKind_Stack)
4324 pVar->idxReg = UINT8_MAX;
4325 else
4326 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4327 }
4328 else
4329 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4330 continue;
4331 }
4332
4333 case kIemNativeWhat_Arg:
4334 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4335 continue;
4336
4337 case kIemNativeWhat_rc:
4338 case kIemNativeWhat_Tmp:
4339 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4340 continue;
4341
4342 case kIemNativeWhat_FixedReserved:
4343#ifdef RT_ARCH_ARM64
4344 continue; /* On ARM the upper half of the virtual 256-bit register. */
4345#endif
4346
4347 case kIemNativeWhat_FixedTmp:
4348 case kIemNativeWhat_pVCpuFixed:
4349 case kIemNativeWhat_pCtxFixed:
4350 case kIemNativeWhat_PcShadow:
4351 case kIemNativeWhat_Invalid:
4352 case kIemNativeWhat_End:
4353 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4354 }
4355 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4356 }
4357 }
4358
4359 /*
4360 * Do the actual freeing.
4361 */
4362 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4363 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4364 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4365 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4366
4367 /* If there are guest register shadows in any call-volatile register, we
4368 have to clear the corrsponding guest register masks for each register. */
4369 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4370 if (fHstSimdRegsWithGstShadow)
4371 {
4372 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4373 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4374 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4375 do
4376 {
4377 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4378 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4379
4380 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4381
4382#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4383 /*
4384 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4385 * to call volatile registers).
4386 */
4387 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4388 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4389 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4390#endif
4391 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4392 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4393
4394 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4395 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4396 } while (fHstSimdRegsWithGstShadow != 0);
4397 }
4398
4399 return off;
4400}
4401#endif
4402
4403
4404/**
4405 * Called right before emitting a call instruction to move anything important
4406 * out of call-volatile registers, free and flush the call-volatile registers,
4407 * optionally freeing argument variables.
4408 *
4409 * @returns New code buffer offset, UINT32_MAX on failure.
4410 * @param pReNative The native recompile state.
4411 * @param off The code buffer offset.
4412 * @param cArgs The number of arguments the function call takes.
4413 * It is presumed that the host register part of these have
4414 * been allocated as such already and won't need moving,
4415 * just freeing.
4416 * @param fKeepVars Mask of variables that should keep their register
4417 * assignments. Caller must take care to handle these.
4418 */
4419DECL_HIDDEN_THROW(uint32_t)
4420iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4421{
4422 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4423
4424 /* fKeepVars will reduce this mask. */
4425 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4426
4427#ifdef RT_ARCH_ARM64
4428AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4429#endif
4430
4431 /*
4432 * Move anything important out of volatile registers.
4433 */
4434 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4435 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4436 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4437#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4438 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4439#endif
4440 & ~g_afIemNativeCallRegs[cArgs];
4441
4442 fRegsToMove &= pReNative->Core.bmHstRegs;
4443 if (!fRegsToMove)
4444 { /* likely */ }
4445 else
4446 {
4447 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4448 while (fRegsToMove != 0)
4449 {
4450 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4451 fRegsToMove &= ~RT_BIT_32(idxReg);
4452
4453 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4454 {
4455 case kIemNativeWhat_Var:
4456 {
4457 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4458 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4459 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4460 Assert(pVar->idxReg == idxReg);
4461#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4462 Assert(!pVar->fSimdReg);
4463#endif
4464 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4465 {
4466 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4467 idxVar, pVar->enmKind, pVar->idxReg));
4468 if (pVar->enmKind != kIemNativeVarKind_Stack)
4469 pVar->idxReg = UINT8_MAX;
4470 else
4471 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4472 }
4473 else
4474 fRegsToFree &= ~RT_BIT_32(idxReg);
4475 continue;
4476 }
4477
4478 case kIemNativeWhat_Arg:
4479 AssertMsgFailed(("What?!?: %u\n", idxReg));
4480 continue;
4481
4482 case kIemNativeWhat_rc:
4483 case kIemNativeWhat_Tmp:
4484 AssertMsgFailed(("Missing free: %u\n", idxReg));
4485 continue;
4486
4487 case kIemNativeWhat_FixedTmp:
4488 case kIemNativeWhat_pVCpuFixed:
4489 case kIemNativeWhat_pCtxFixed:
4490 case kIemNativeWhat_PcShadow:
4491 case kIemNativeWhat_FixedReserved:
4492 case kIemNativeWhat_Invalid:
4493 case kIemNativeWhat_End:
4494 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4495 }
4496 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4497 }
4498 }
4499
4500 /*
4501 * Do the actual freeing.
4502 */
4503 if (pReNative->Core.bmHstRegs & fRegsToFree)
4504 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4505 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4506 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4507
4508 /* If there are guest register shadows in any call-volatile register, we
4509 have to clear the corrsponding guest register masks for each register. */
4510 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4511 if (fHstRegsWithGstShadow)
4512 {
4513 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4514 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4515 fHstRegsWithGstShadow));
4516 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4517 do
4518 {
4519 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4520 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4521
4522 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4523
4524#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4525 /*
4526 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4527 * to call volatile registers).
4528 */
4529 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4530 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4531 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4532#endif
4533
4534 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4535 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4536 } while (fHstRegsWithGstShadow != 0);
4537 }
4538
4539#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4540 /* Now for the SIMD registers, no argument support for now. */
4541 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4542#endif
4543
4544 return off;
4545}
4546
4547
4548/**
4549 * Flushes a set of guest register shadow copies.
4550 *
4551 * This is usually done after calling a threaded function or a C-implementation
4552 * of an instruction.
4553 *
4554 * @param pReNative The native recompile state.
4555 * @param fGstRegs Set of guest registers to flush.
4556 */
4557DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4558{
4559 /*
4560 * Reduce the mask by what's currently shadowed
4561 */
4562 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4563 fGstRegs &= bmGstRegShadowsOld;
4564 if (fGstRegs)
4565 {
4566 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4567 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4568 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4569 if (bmGstRegShadowsNew)
4570 {
4571 /*
4572 * Partial.
4573 */
4574 do
4575 {
4576 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4577 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4578 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4579 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4580 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4581#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4582 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4583#endif
4584
4585 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4586 fGstRegs &= ~fInThisHstReg;
4587 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4588 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4589 if (!fGstRegShadowsNew)
4590 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4591 } while (fGstRegs != 0);
4592 }
4593 else
4594 {
4595 /*
4596 * Clear all.
4597 */
4598 do
4599 {
4600 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4601 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4602 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4603 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4604 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4605#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4606 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4607#endif
4608
4609 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4610 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4611 } while (fGstRegs != 0);
4612 pReNative->Core.bmHstRegsWithGstShadow = 0;
4613 }
4614 }
4615}
4616
4617
4618/**
4619 * Flushes guest register shadow copies held by a set of host registers.
4620 *
4621 * This is used with the TLB lookup code for ensuring that we don't carry on
4622 * with any guest shadows in volatile registers, as these will get corrupted by
4623 * a TLB miss.
4624 *
4625 * @param pReNative The native recompile state.
4626 * @param fHstRegs Set of host registers to flush guest shadows for.
4627 */
4628DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4629{
4630 /*
4631 * Reduce the mask by what's currently shadowed.
4632 */
4633 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4634 fHstRegs &= bmHstRegsWithGstShadowOld;
4635 if (fHstRegs)
4636 {
4637 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4638 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4639 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4640 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4641 if (bmHstRegsWithGstShadowNew)
4642 {
4643 /*
4644 * Partial (likely).
4645 */
4646 uint64_t fGstShadows = 0;
4647 do
4648 {
4649 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4650 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4651 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4652 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4653#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4654 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4655#endif
4656
4657 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4658 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4659 fHstRegs &= ~RT_BIT_32(idxHstReg);
4660 } while (fHstRegs != 0);
4661 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4662 }
4663 else
4664 {
4665 /*
4666 * Clear all.
4667 */
4668 do
4669 {
4670 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4671 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4672 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4673 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4674#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4675 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4676#endif
4677
4678 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4679 fHstRegs &= ~RT_BIT_32(idxHstReg);
4680 } while (fHstRegs != 0);
4681 pReNative->Core.bmGstRegShadows = 0;
4682 }
4683 }
4684}
4685
4686
4687/**
4688 * Restores guest shadow copies in volatile registers.
4689 *
4690 * This is used after calling a helper function (think TLB miss) to restore the
4691 * register state of volatile registers.
4692 *
4693 * @param pReNative The native recompile state.
4694 * @param off The code buffer offset.
4695 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4696 * be active (allocated) w/o asserting. Hack.
4697 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4698 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4699 */
4700DECL_HIDDEN_THROW(uint32_t)
4701iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4702{
4703 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4704 if (fHstRegs)
4705 {
4706 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4707 do
4708 {
4709 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4710
4711 /* It's not fatal if a register is active holding a variable that
4712 shadowing a guest register, ASSUMING all pending guest register
4713 writes were flushed prior to the helper call. However, we'll be
4714 emitting duplicate restores, so it wasts code space. */
4715 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4716 RT_NOREF(fHstRegsActiveShadows);
4717
4718 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4719#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4720 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4721#endif
4722 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4723 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4724 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4725
4726 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4727 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4728
4729 fHstRegs &= ~RT_BIT_32(idxHstReg);
4730 } while (fHstRegs != 0);
4731 }
4732 return off;
4733}
4734
4735
4736
4737
4738/*********************************************************************************************************************************
4739* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4740*********************************************************************************************************************************/
4741#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4742
4743/**
4744 * Info about shadowed guest SIMD register values.
4745 * @see IEMNATIVEGSTSIMDREG
4746 */
4747static struct
4748{
4749 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4750 uint32_t offXmm;
4751 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4752 uint32_t offYmm;
4753 /** Name (for logging). */
4754 const char *pszName;
4755} const g_aGstSimdShadowInfo[] =
4756{
4757#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4758 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4759 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4760 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4761 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4762 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4763 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4764 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4765 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4766 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4767 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4768 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4769 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4770 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4771 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4772 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4773 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4774 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4775#undef CPUMCTX_OFF_AND_SIZE
4776};
4777AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4778
4779
4780/**
4781 * Frees a temporary SIMD register.
4782 *
4783 * Any shadow copies of guest registers assigned to the host register will not
4784 * be flushed by this operation.
4785 */
4786DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4787{
4788 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4789 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4790 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4791 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4792 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4793}
4794
4795
4796/**
4797 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4798 *
4799 * @returns New code bufferoffset.
4800 * @param pReNative The native recompile state.
4801 * @param off Current code buffer position.
4802 * @param enmGstSimdReg The guest SIMD register to flush.
4803 */
4804DECL_HIDDEN_THROW(uint32_t)
4805iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4806{
4807 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4808
4809 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4810 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4811 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4812 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4813
4814 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4815 {
4816 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4817 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4818 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4819 }
4820
4821 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4822 {
4823 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4824 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4825 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4826 }
4827
4828 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4829 return off;
4830}
4831
4832
4833/**
4834 * Flush the given set of guest SIMD registers if marked as dirty.
4835 *
4836 * @returns New code buffer offset.
4837 * @param pReNative The native recompile state.
4838 * @param off Current code buffer position.
4839 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4840 */
4841DECL_HIDDEN_THROW(uint32_t)
4842iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4843{
4844 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4845 & fFlushGstSimdReg;
4846 if (bmGstSimdRegShadowDirty)
4847 {
4848# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4849 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4850 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4851# endif
4852
4853 do
4854 {
4855 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4856 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4857 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4858 } while (bmGstSimdRegShadowDirty);
4859 }
4860
4861 return off;
4862}
4863
4864
4865#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4866/**
4867 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4868 *
4869 * @returns New code buffer offset.
4870 * @param pReNative The native recompile state.
4871 * @param off Current code buffer position.
4872 * @param idxHstSimdReg The host SIMD register.
4873 *
4874 * @note This doesn't do any unshadowing of guest registers from the host register.
4875 */
4876DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4877{
4878 /* We need to flush any pending guest register writes this host register shadows. */
4879 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4880 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4881 if (bmGstSimdRegShadowDirty)
4882 {
4883# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4884 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4885 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4886# endif
4887
4888 do
4889 {
4890 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4891 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4892 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4893 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4894 } while (bmGstSimdRegShadowDirty);
4895 }
4896
4897 return off;
4898}
4899#endif
4900
4901
4902/**
4903 * Locate a register, possibly freeing one up.
4904 *
4905 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4906 * failed.
4907 *
4908 * @returns Host register number on success. Returns UINT8_MAX if no registers
4909 * found, the caller is supposed to deal with this and raise a
4910 * allocation type specific status code (if desired).
4911 *
4912 * @throws VBox status code if we're run into trouble spilling a variable of
4913 * recording debug info. Does NOT throw anything if we're out of
4914 * registers, though.
4915 */
4916static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4917 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4918{
4919 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4920 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4921 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4922
4923 /*
4924 * Try a freed register that's shadowing a guest register.
4925 */
4926 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4927 if (fRegs)
4928 {
4929 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4930
4931#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4932 /*
4933 * When we have livness information, we use it to kick out all shadowed
4934 * guest register that will not be needed any more in this TB. If we're
4935 * lucky, this may prevent us from ending up here again.
4936 *
4937 * Note! We must consider the previous entry here so we don't free
4938 * anything that the current threaded function requires (current
4939 * entry is produced by the next threaded function).
4940 */
4941 uint32_t const idxCurCall = pReNative->idxCurCall;
4942 if (idxCurCall > 0)
4943 {
4944 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4945
4946# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4947 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4948 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4949 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4950#else
4951 /* Construct a mask of the registers not in the read or write state.
4952 Note! We could skips writes, if they aren't from us, as this is just
4953 a hack to prevent trashing registers that have just been written
4954 or will be written when we retire the current instruction. */
4955 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4956 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4957 & IEMLIVENESSBIT_MASK;
4958#endif
4959 /* If it matches any shadowed registers. */
4960 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4961 {
4962 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4963 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4964 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4965
4966 /* See if we've got any unshadowed registers we can return now. */
4967 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4968 if (fUnshadowedRegs)
4969 {
4970 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4971 return (fPreferVolatile
4972 ? ASMBitFirstSetU32(fUnshadowedRegs)
4973 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4974 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4975 - 1;
4976 }
4977 }
4978 }
4979#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4980
4981 unsigned const idxReg = (fPreferVolatile
4982 ? ASMBitFirstSetU32(fRegs)
4983 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4984 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4985 - 1;
4986
4987 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4988 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4989 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4990 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4991
4992 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4993 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4994
4995 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4996 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4997 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4998 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4999 return idxReg;
5000 }
5001
5002 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5003
5004 /*
5005 * Try free up a variable that's in a register.
5006 *
5007 * We do two rounds here, first evacuating variables we don't need to be
5008 * saved on the stack, then in the second round move things to the stack.
5009 */
5010 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5011 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5012 {
5013 uint32_t fVars = pReNative->Core.bmVars;
5014 while (fVars)
5015 {
5016 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5017 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5018 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5019 continue;
5020
5021 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5022 && (RT_BIT_32(idxReg) & fRegMask)
5023 && ( iLoop == 0
5024 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5025 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5026 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5027 {
5028 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5029 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5030 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5031 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5032 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5033 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5034
5035 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5036 {
5037 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5038 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5039 }
5040
5041 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5042 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5043
5044 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5045 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5046 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5047 return idxReg;
5048 }
5049 fVars &= ~RT_BIT_32(idxVar);
5050 }
5051 }
5052
5053 AssertFailed();
5054 return UINT8_MAX;
5055}
5056
5057
5058/**
5059 * Flushes a set of guest register shadow copies.
5060 *
5061 * This is usually done after calling a threaded function or a C-implementation
5062 * of an instruction.
5063 *
5064 * @param pReNative The native recompile state.
5065 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5066 */
5067DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5068{
5069 /*
5070 * Reduce the mask by what's currently shadowed
5071 */
5072 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5073 fGstSimdRegs &= bmGstSimdRegShadows;
5074 if (fGstSimdRegs)
5075 {
5076 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5077 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5078 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5079 if (bmGstSimdRegShadowsNew)
5080 {
5081 /*
5082 * Partial.
5083 */
5084 do
5085 {
5086 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5087 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5088 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5089 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5090 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5091 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5092
5093 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5094 fGstSimdRegs &= ~fInThisHstReg;
5095 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5096 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5097 if (!fGstRegShadowsNew)
5098 {
5099 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5100 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5101 }
5102 } while (fGstSimdRegs != 0);
5103 }
5104 else
5105 {
5106 /*
5107 * Clear all.
5108 */
5109 do
5110 {
5111 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5112 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5113 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5114 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5115 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5116 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5117
5118 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5119 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5120 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5121 } while (fGstSimdRegs != 0);
5122 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5123 }
5124 }
5125}
5126
5127
5128/**
5129 * Allocates a temporary host SIMD register.
5130 *
5131 * This may emit code to save register content onto the stack in order to free
5132 * up a register.
5133 *
5134 * @returns The host register number; throws VBox status code on failure,
5135 * so no need to check the return value.
5136 * @param pReNative The native recompile state.
5137 * @param poff Pointer to the variable with the code buffer position.
5138 * This will be update if we need to move a variable from
5139 * register to stack in order to satisfy the request.
5140 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5141 * registers (@c true, default) or the other way around
5142 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5143 */
5144DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5145{
5146 /*
5147 * Try find a completely unused register, preferably a call-volatile one.
5148 */
5149 uint8_t idxSimdReg;
5150 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5151 & ~pReNative->Core.bmHstRegsWithGstShadow
5152 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5153 if (fRegs)
5154 {
5155 if (fPreferVolatile)
5156 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5157 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5158 else
5159 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5160 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5161 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5162 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5163
5164 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5165 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5166 }
5167 else
5168 {
5169 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5170 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5171 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5172 }
5173
5174 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5175 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5176}
5177
5178
5179/**
5180 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5181 * registers.
5182 *
5183 * @returns The host register number; throws VBox status code on failure,
5184 * so no need to check the return value.
5185 * @param pReNative The native recompile state.
5186 * @param poff Pointer to the variable with the code buffer position.
5187 * This will be update if we need to move a variable from
5188 * register to stack in order to satisfy the request.
5189 * @param fRegMask Mask of acceptable registers.
5190 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5191 * registers (@c true, default) or the other way around
5192 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5193 */
5194DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5195 bool fPreferVolatile /*= true*/)
5196{
5197 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5198 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5199
5200 /*
5201 * Try find a completely unused register, preferably a call-volatile one.
5202 */
5203 uint8_t idxSimdReg;
5204 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5205 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5206 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5207 & fRegMask;
5208 if (fRegs)
5209 {
5210 if (fPreferVolatile)
5211 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5212 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5213 else
5214 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5215 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5216 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5217 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5218
5219 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5220 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5221 }
5222 else
5223 {
5224 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5225 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5226 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5227 }
5228
5229 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5230 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5231}
5232
5233
5234/**
5235 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5236 *
5237 * @param pReNative The native recompile state.
5238 * @param idxHstSimdReg The host SIMD register to update the state for.
5239 * @param enmLoadSz The load size to set.
5240 */
5241DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5242 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5243{
5244 /* Everything valid already? -> nothing to do. */
5245 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5246 return;
5247
5248 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5249 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5250 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5251 {
5252 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5253 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5254 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5255 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5256 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5257 }
5258}
5259
5260
5261static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5262 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5263{
5264 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5265 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5266 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5267 {
5268# ifdef RT_ARCH_ARM64
5269 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5270 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5271# endif
5272
5273 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5274 {
5275 switch (enmLoadSzDst)
5276 {
5277 case kIemNativeGstSimdRegLdStSz_256:
5278 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5279 break;
5280 case kIemNativeGstSimdRegLdStSz_Low128:
5281 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5282 break;
5283 case kIemNativeGstSimdRegLdStSz_High128:
5284 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5285 break;
5286 default:
5287 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5288 }
5289
5290 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5291 }
5292 }
5293 else
5294 {
5295 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5296 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5297 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5298 }
5299
5300 return off;
5301}
5302
5303
5304/**
5305 * Allocates a temporary host SIMD register for keeping a guest
5306 * SIMD register value.
5307 *
5308 * Since we may already have a register holding the guest register value,
5309 * code will be emitted to do the loading if that's not the case. Code may also
5310 * be emitted if we have to free up a register to satify the request.
5311 *
5312 * @returns The host register number; throws VBox status code on failure, so no
5313 * need to check the return value.
5314 * @param pReNative The native recompile state.
5315 * @param poff Pointer to the variable with the code buffer
5316 * position. This will be update if we need to move a
5317 * variable from register to stack in order to satisfy
5318 * the request.
5319 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5320 * @param enmIntendedUse How the caller will be using the host register.
5321 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5322 * register is okay (default). The ASSUMPTION here is
5323 * that the caller has already flushed all volatile
5324 * registers, so this is only applied if we allocate a
5325 * new register.
5326 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5327 */
5328DECL_HIDDEN_THROW(uint8_t)
5329iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5330 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5331 bool fNoVolatileRegs /*= false*/)
5332{
5333 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5334#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5335 AssertMsg( pReNative->idxCurCall == 0
5336 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5337 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5338 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5339 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5340 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5341 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5342#endif
5343#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5344 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5345#endif
5346 uint32_t const fRegMask = !fNoVolatileRegs
5347 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5348 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5349
5350 /*
5351 * First check if the guest register value is already in a host register.
5352 */
5353 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5354 {
5355 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5356 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5357 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5358 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5359
5360 /* It's not supposed to be allocated... */
5361 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5362 {
5363 /*
5364 * If the register will trash the guest shadow copy, try find a
5365 * completely unused register we can use instead. If that fails,
5366 * we need to disassociate the host reg from the guest reg.
5367 */
5368 /** @todo would be nice to know if preserving the register is in any way helpful. */
5369 /* If the purpose is calculations, try duplicate the register value as
5370 we'll be clobbering the shadow. */
5371 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5372 && ( ~pReNative->Core.bmHstSimdRegs
5373 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5374 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5375 {
5376 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5377
5378 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5379
5380 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5381 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5382 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5383 idxSimdReg = idxRegNew;
5384 }
5385 /* If the current register matches the restrictions, go ahead and allocate
5386 it for the caller. */
5387 else if (fRegMask & RT_BIT_32(idxSimdReg))
5388 {
5389 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5390 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5391 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5392 {
5393 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5394 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5395 else
5396 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5397 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5398 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5399 }
5400 else
5401 {
5402 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5403 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5404 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5405 }
5406 }
5407 /* Otherwise, allocate a register that satisfies the caller and transfer
5408 the shadowing if compatible with the intended use. (This basically
5409 means the call wants a non-volatile register (RSP push/pop scenario).) */
5410 else
5411 {
5412 Assert(fNoVolatileRegs);
5413 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5414 !fNoVolatileRegs
5415 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5416 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5417 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5418 {
5419 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5420 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5421 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5422 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5423 }
5424 else
5425 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5426 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5427 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5428 idxSimdReg = idxRegNew;
5429 }
5430 }
5431 else
5432 {
5433 /*
5434 * Oops. Shadowed guest register already allocated!
5435 *
5436 * Allocate a new register, copy the value and, if updating, the
5437 * guest shadow copy assignment to the new register.
5438 */
5439 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5440 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5441 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5442 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5443
5444 /** @todo share register for readonly access. */
5445 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5446 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5447
5448 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5449 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5450 else
5451 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5452
5453 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5454 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5455 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5456 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5457 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5458 else
5459 {
5460 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5461 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5462 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5463 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5464 }
5465 idxSimdReg = idxRegNew;
5466 }
5467 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5468
5469#ifdef VBOX_STRICT
5470 /* Strict builds: Check that the value is correct. */
5471 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5472 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5473#endif
5474
5475 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5476 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5477 {
5478# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5479 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5480 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5481# endif
5482
5483 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5484 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5485 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5486 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5487 else
5488 {
5489 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5490 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5491 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5492 }
5493 }
5494
5495 return idxSimdReg;
5496 }
5497
5498 /*
5499 * Allocate a new register, load it with the guest value and designate it as a copy of the
5500 */
5501 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5502
5503 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5504 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5505 else
5506 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5507
5508 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5509 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5510
5511 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5512 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5513 {
5514# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5515 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5516 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5517# endif
5518
5519 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5520 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5521 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5522 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5523 else
5524 {
5525 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5526 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5527 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5528 }
5529 }
5530
5531 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5532 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5533
5534 return idxRegNew;
5535}
5536
5537
5538/**
5539 * Flushes guest SIMD register shadow copies held by a set of host registers.
5540 *
5541 * This is used whenever calling an external helper for ensuring that we don't carry on
5542 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5543 *
5544 * @param pReNative The native recompile state.
5545 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5546 */
5547DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5548{
5549 /*
5550 * Reduce the mask by what's currently shadowed.
5551 */
5552 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5553 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5554 if (fHstSimdRegs)
5555 {
5556 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5557 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5558 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5559 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5560 if (bmHstSimdRegsWithGstShadowNew)
5561 {
5562 /*
5563 * Partial (likely).
5564 */
5565 uint64_t fGstShadows = 0;
5566 do
5567 {
5568 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5569 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5570 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5571 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5572 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5573 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5574
5575 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5576 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5577 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5578 } while (fHstSimdRegs != 0);
5579 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5580 }
5581 else
5582 {
5583 /*
5584 * Clear all.
5585 */
5586 do
5587 {
5588 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5589 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5590 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5591 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5592 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5593 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5594
5595 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5596 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5597 } while (fHstSimdRegs != 0);
5598 pReNative->Core.bmGstSimdRegShadows = 0;
5599 }
5600 }
5601}
5602#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5603
5604
5605
5606/*********************************************************************************************************************************
5607* Code emitters for flushing pending guest register writes and sanity checks *
5608*********************************************************************************************************************************/
5609
5610#ifdef VBOX_STRICT
5611/**
5612 * Does internal register allocator sanity checks.
5613 */
5614DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5615{
5616 /*
5617 * Iterate host registers building a guest shadowing set.
5618 */
5619 uint64_t bmGstRegShadows = 0;
5620 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5621 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5622 while (bmHstRegsWithGstShadow)
5623 {
5624 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5625 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5626 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5627
5628 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5629 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5630 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5631 bmGstRegShadows |= fThisGstRegShadows;
5632 while (fThisGstRegShadows)
5633 {
5634 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5635 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5636 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5637 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5638 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5639 }
5640 }
5641 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5642 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5643 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5644
5645 /*
5646 * Now the other way around, checking the guest to host index array.
5647 */
5648 bmHstRegsWithGstShadow = 0;
5649 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5650 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5651 while (bmGstRegShadows)
5652 {
5653 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5654 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5655 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5656
5657 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5658 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5659 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5660 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5661 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5662 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5663 }
5664 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5665 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5666 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5667}
5668#endif /* VBOX_STRICT */
5669
5670
5671/**
5672 * Flushes any delayed guest register writes.
5673 *
5674 * This must be called prior to calling CImpl functions and any helpers that use
5675 * the guest state (like raising exceptions) and such.
5676 *
5677 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5678 * the caller if it wishes to do so.
5679 */
5680DECL_HIDDEN_THROW(uint32_t)
5681iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5682{
5683#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5684 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5685 off = iemNativeEmitPcWriteback(pReNative, off);
5686#else
5687 RT_NOREF(pReNative, fGstShwExcept);
5688#endif
5689
5690#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5691 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5692#endif
5693
5694#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5695 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5696#endif
5697
5698 return off;
5699}
5700
5701#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5702
5703# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5704
5705/**
5706 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5707 */
5708DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5709{
5710 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5711 Assert(pReNative->Core.fDebugPcInitialized);
5712
5713 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5714# ifdef RT_ARCH_AMD64
5715 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5716 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5717 pCodeBuf[off++] = 0x3b;
5718 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5719# else
5720 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5721 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5722 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5723# endif
5724
5725 uint32_t offFixup = off;
5726 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5727 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5728 iemNativeFixupFixedJump(pReNative, offFixup, off);
5729
5730 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5731 return off;
5732}
5733
5734
5735/**
5736 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5737 */
5738DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5739{
5740 if (pReNative->Core.fDebugPcInitialized)
5741 {
5742 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5743 if (pReNative->Core.offPc)
5744 {
5745 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5746 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5747 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5748 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5749 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5750 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5751 }
5752 else
5753 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5754 iemNativeRegFreeTmp(pReNative, idxPcReg);
5755 }
5756 return off;
5757}
5758
5759# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
5760
5761/**
5762 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5763 */
5764DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5765{
5766 Assert(pReNative->Core.offPc);
5767# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
5768 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
5769# else
5770 uint8_t const idxOldInstrPlusOne = pReNative->Core.idxInstrPlusOneOfLastPcUpdate;
5771 uint8_t idxCurCall = pReNative->idxCurCall;
5772 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
5773 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
5774 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
5775 uint8_t const cInstrsSkipped = idxInstr <= pReNative->Core.idxInstrPlusOneOfLastPcUpdate ? 0
5776 : idxInstr - pReNative->Core.idxInstrPlusOneOfLastPcUpdate;
5777 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
5778 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
5779
5780 pReNative->Core.idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, pReNative->Core.idxInstrPlusOneOfLastPcUpdate);
5781 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
5782
5783# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5784 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5785 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
5786# endif
5787# endif
5788
5789# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5790 /* Allocate a temporary PC register. */
5791 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5792
5793 /* Perform the addition and store the result. */
5794 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5795 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5796# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5797 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5798# endif
5799
5800 /* Free but don't flush the PC register. */
5801 iemNativeRegFreeTmp(pReNative, idxPcReg);
5802# else
5803 /* Compare the shadow with the context value, they should match. */
5804 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5805 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5806# endif
5807
5808 pReNative->Core.offPc = 0;
5809
5810 return off;
5811}
5812
5813#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5814
5815
5816/*********************************************************************************************************************************
5817* Code Emitters (larger snippets) *
5818*********************************************************************************************************************************/
5819
5820/**
5821 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5822 * extending to 64-bit width.
5823 *
5824 * @returns New code buffer offset on success, UINT32_MAX on failure.
5825 * @param pReNative .
5826 * @param off The current code buffer position.
5827 * @param idxHstReg The host register to load the guest register value into.
5828 * @param enmGstReg The guest register to load.
5829 *
5830 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5831 * that is something the caller needs to do if applicable.
5832 */
5833DECL_HIDDEN_THROW(uint32_t)
5834iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5835{
5836 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5837 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5838
5839 switch (g_aGstShadowInfo[enmGstReg].cb)
5840 {
5841 case sizeof(uint64_t):
5842 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5843 case sizeof(uint32_t):
5844 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5845 case sizeof(uint16_t):
5846 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5847#if 0 /* not present in the table. */
5848 case sizeof(uint8_t):
5849 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5850#endif
5851 default:
5852 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5853 }
5854}
5855
5856
5857#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5858/**
5859 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5860 *
5861 * @returns New code buffer offset on success, UINT32_MAX on failure.
5862 * @param pReNative The recompiler state.
5863 * @param off The current code buffer position.
5864 * @param idxHstSimdReg The host register to load the guest register value into.
5865 * @param enmGstSimdReg The guest register to load.
5866 * @param enmLoadSz The load size of the register.
5867 *
5868 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5869 * that is something the caller needs to do if applicable.
5870 */
5871DECL_HIDDEN_THROW(uint32_t)
5872iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5873 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5874{
5875 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5876
5877 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5878 switch (enmLoadSz)
5879 {
5880 case kIemNativeGstSimdRegLdStSz_256:
5881 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5882 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5883 case kIemNativeGstSimdRegLdStSz_Low128:
5884 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5885 case kIemNativeGstSimdRegLdStSz_High128:
5886 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5887 default:
5888 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5889 }
5890}
5891#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5892
5893#ifdef VBOX_STRICT
5894
5895/**
5896 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5897 *
5898 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5899 * Trashes EFLAGS on AMD64.
5900 */
5901DECL_HIDDEN_THROW(uint32_t)
5902iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5903{
5904# ifdef RT_ARCH_AMD64
5905 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5906
5907 /* rol reg64, 32 */
5908 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5909 pbCodeBuf[off++] = 0xc1;
5910 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5911 pbCodeBuf[off++] = 32;
5912
5913 /* test reg32, ffffffffh */
5914 if (idxReg >= 8)
5915 pbCodeBuf[off++] = X86_OP_REX_B;
5916 pbCodeBuf[off++] = 0xf7;
5917 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5918 pbCodeBuf[off++] = 0xff;
5919 pbCodeBuf[off++] = 0xff;
5920 pbCodeBuf[off++] = 0xff;
5921 pbCodeBuf[off++] = 0xff;
5922
5923 /* je/jz +1 */
5924 pbCodeBuf[off++] = 0x74;
5925 pbCodeBuf[off++] = 0x01;
5926
5927 /* int3 */
5928 pbCodeBuf[off++] = 0xcc;
5929
5930 /* rol reg64, 32 */
5931 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5932 pbCodeBuf[off++] = 0xc1;
5933 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5934 pbCodeBuf[off++] = 32;
5935
5936# elif defined(RT_ARCH_ARM64)
5937 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5938 /* lsr tmp0, reg64, #32 */
5939 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5940 /* cbz tmp0, +1 */
5941 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5942 /* brk #0x1100 */
5943 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5944
5945# else
5946# error "Port me!"
5947# endif
5948 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5949 return off;
5950}
5951
5952
5953/**
5954 * Emitting code that checks that the content of register @a idxReg is the same
5955 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5956 * instruction if that's not the case.
5957 *
5958 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5959 * Trashes EFLAGS on AMD64.
5960 */
5961DECL_HIDDEN_THROW(uint32_t)
5962iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5963{
5964#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5965 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5966 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5967 return off;
5968#endif
5969
5970# ifdef RT_ARCH_AMD64
5971 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5972
5973 /* cmp reg, [mem] */
5974 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5975 {
5976 if (idxReg >= 8)
5977 pbCodeBuf[off++] = X86_OP_REX_R;
5978 pbCodeBuf[off++] = 0x38;
5979 }
5980 else
5981 {
5982 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5983 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5984 else
5985 {
5986 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5987 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5988 else
5989 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5990 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5991 if (idxReg >= 8)
5992 pbCodeBuf[off++] = X86_OP_REX_R;
5993 }
5994 pbCodeBuf[off++] = 0x39;
5995 }
5996 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5997
5998 /* je/jz +1 */
5999 pbCodeBuf[off++] = 0x74;
6000 pbCodeBuf[off++] = 0x01;
6001
6002 /* int3 */
6003 pbCodeBuf[off++] = 0xcc;
6004
6005 /* For values smaller than the register size, we must check that the rest
6006 of the register is all zeros. */
6007 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6008 {
6009 /* test reg64, imm32 */
6010 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6011 pbCodeBuf[off++] = 0xf7;
6012 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6013 pbCodeBuf[off++] = 0;
6014 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6015 pbCodeBuf[off++] = 0xff;
6016 pbCodeBuf[off++] = 0xff;
6017
6018 /* je/jz +1 */
6019 pbCodeBuf[off++] = 0x74;
6020 pbCodeBuf[off++] = 0x01;
6021
6022 /* int3 */
6023 pbCodeBuf[off++] = 0xcc;
6024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6025 }
6026 else
6027 {
6028 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6029 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6030 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6031 }
6032
6033# elif defined(RT_ARCH_ARM64)
6034 /* mov TMP0, [gstreg] */
6035 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6036
6037 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6038 /* sub tmp0, tmp0, idxReg */
6039 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6040 /* cbz tmp0, +1 */
6041 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6042 /* brk #0x1000+enmGstReg */
6043 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6045
6046# else
6047# error "Port me!"
6048# endif
6049 return off;
6050}
6051
6052
6053# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6054# ifdef RT_ARCH_AMD64
6055/**
6056 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6057 */
6058DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6059{
6060 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6061 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6062 if (idxSimdReg >= 8)
6063 pbCodeBuf[off++] = X86_OP_REX_R;
6064 pbCodeBuf[off++] = 0x0f;
6065 pbCodeBuf[off++] = 0x38;
6066 pbCodeBuf[off++] = 0x29;
6067 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6068
6069 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6070 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6071 pbCodeBuf[off++] = X86_OP_REX_W
6072 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6073 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6074 pbCodeBuf[off++] = 0x0f;
6075 pbCodeBuf[off++] = 0x3a;
6076 pbCodeBuf[off++] = 0x16;
6077 pbCodeBuf[off++] = 0xeb;
6078 pbCodeBuf[off++] = 0x00;
6079
6080 /* cmp tmp0, 0xffffffffffffffff. */
6081 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6082 pbCodeBuf[off++] = 0x83;
6083 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6084 pbCodeBuf[off++] = 0xff;
6085
6086 /* je/jz +1 */
6087 pbCodeBuf[off++] = 0x74;
6088 pbCodeBuf[off++] = 0x01;
6089
6090 /* int3 */
6091 pbCodeBuf[off++] = 0xcc;
6092
6093 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6094 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6095 pbCodeBuf[off++] = X86_OP_REX_W
6096 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6097 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6098 pbCodeBuf[off++] = 0x0f;
6099 pbCodeBuf[off++] = 0x3a;
6100 pbCodeBuf[off++] = 0x16;
6101 pbCodeBuf[off++] = 0xeb;
6102 pbCodeBuf[off++] = 0x01;
6103
6104 /* cmp tmp0, 0xffffffffffffffff. */
6105 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6106 pbCodeBuf[off++] = 0x83;
6107 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6108 pbCodeBuf[off++] = 0xff;
6109
6110 /* je/jz +1 */
6111 pbCodeBuf[off++] = 0x74;
6112 pbCodeBuf[off++] = 0x01;
6113
6114 /* int3 */
6115 pbCodeBuf[off++] = 0xcc;
6116
6117 return off;
6118}
6119# endif
6120
6121
6122/**
6123 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6124 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6125 * instruction if that's not the case.
6126 *
6127 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6128 * Trashes EFLAGS on AMD64.
6129 */
6130DECL_HIDDEN_THROW(uint32_t)
6131iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6132 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6133{
6134 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6135 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6136 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6137 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6138 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6139 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6140 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6141 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6142 return off;
6143
6144# ifdef RT_ARCH_AMD64
6145 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6146 {
6147 /* movdqa vectmp0, idxSimdReg */
6148 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6149
6150 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6151
6152 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6153 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6154 }
6155
6156 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6157 {
6158 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6160
6161 /* vextracti128 vectmp0, idxSimdReg, 1 */
6162 pbCodeBuf[off++] = X86_OP_VEX3;
6163 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6164 | X86_OP_VEX3_BYTE1_X
6165 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6166 | 0x03; /* Opcode map */
6167 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6168 pbCodeBuf[off++] = 0x39;
6169 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6170 pbCodeBuf[off++] = 0x01;
6171
6172 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6173 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6174 }
6175# elif defined(RT_ARCH_ARM64)
6176 /* mov vectmp0, [gstreg] */
6177 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6178
6179 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6180 {
6181 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6182 /* eor vectmp0, vectmp0, idxSimdReg */
6183 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6184 /* uaddlv vectmp0, vectmp0.16B */
6185 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6186 /* umov tmp0, vectmp0.H[0] */
6187 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6188 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6189 /* cbz tmp0, +1 */
6190 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6191 /* brk #0x1000+enmGstReg */
6192 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6193 }
6194
6195 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6196 {
6197 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6198 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6199 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6200 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6201 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6202 /* umov tmp0, (vectmp0 + 1).H[0] */
6203 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6204 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6205 /* cbz tmp0, +1 */
6206 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6207 /* brk #0x1000+enmGstReg */
6208 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6209 }
6210
6211# else
6212# error "Port me!"
6213# endif
6214
6215 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6216 return off;
6217}
6218# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6219
6220
6221/**
6222 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6223 * important bits.
6224 *
6225 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6226 * Trashes EFLAGS on AMD64.
6227 */
6228DECL_HIDDEN_THROW(uint32_t)
6229iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6230{
6231 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6232 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6233 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6234 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6235
6236#ifdef RT_ARCH_AMD64
6237 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6238
6239 /* je/jz +1 */
6240 pbCodeBuf[off++] = 0x74;
6241 pbCodeBuf[off++] = 0x01;
6242
6243 /* int3 */
6244 pbCodeBuf[off++] = 0xcc;
6245
6246# elif defined(RT_ARCH_ARM64)
6247 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6248
6249 /* b.eq +1 */
6250 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6251 /* brk #0x2000 */
6252 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6253
6254# else
6255# error "Port me!"
6256# endif
6257 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6258
6259 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6260 return off;
6261}
6262
6263#endif /* VBOX_STRICT */
6264
6265
6266#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6267/**
6268 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6269 */
6270DECL_HIDDEN_THROW(uint32_t)
6271iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6272{
6273 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6274
6275 fEflNeeded &= X86_EFL_STATUS_BITS;
6276 if (fEflNeeded)
6277 {
6278# ifdef RT_ARCH_AMD64
6279 /* test dword [pVCpu + offVCpu], imm32 */
6280 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6281 if (fEflNeeded <= 0xff)
6282 {
6283 pCodeBuf[off++] = 0xf6;
6284 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6285 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6286 }
6287 else
6288 {
6289 pCodeBuf[off++] = 0xf7;
6290 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6291 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6292 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6293 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6294 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6295 }
6296
6297 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6298 pCodeBuf[off++] = 0xcc;
6299
6300 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6301
6302# else
6303 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6304 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6305 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6306# ifdef RT_ARCH_ARM64
6307 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6308 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6309# else
6310# error "Port me!"
6311# endif
6312 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6313# endif
6314 }
6315 return off;
6316}
6317#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6318
6319
6320/**
6321 * Emits a code for checking the return code of a call and rcPassUp, returning
6322 * from the code if either are non-zero.
6323 */
6324DECL_HIDDEN_THROW(uint32_t)
6325iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6326{
6327#ifdef RT_ARCH_AMD64
6328 /*
6329 * AMD64: eax = call status code.
6330 */
6331
6332 /* edx = rcPassUp */
6333 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6334# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6335 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6336# endif
6337
6338 /* edx = eax | rcPassUp */
6339 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6340 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6343
6344 /* Jump to non-zero status return path. */
6345 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6346
6347 /* done. */
6348
6349#elif RT_ARCH_ARM64
6350 /*
6351 * ARM64: w0 = call status code.
6352 */
6353# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6354 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6355# endif
6356 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6357
6358 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6359
6360 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6361
6362 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pu32CodeBuf, off, ARMV8_A64_REG_X4, true /*f64Bit*/,
6363 kIemNativeLabelType_NonZeroRetOrPassUp);
6364
6365#else
6366# error "port me"
6367#endif
6368 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6369 RT_NOREF_PV(idxInstr);
6370 return off;
6371}
6372
6373
6374/**
6375 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6376 * raising a \#GP(0) if it isn't.
6377 *
6378 * @returns New code buffer offset, UINT32_MAX on failure.
6379 * @param pReNative The native recompile state.
6380 * @param off The code buffer offset.
6381 * @param idxAddrReg The host register with the address to check.
6382 * @param idxInstr The current instruction.
6383 */
6384DECL_HIDDEN_THROW(uint32_t)
6385iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6386{
6387 /*
6388 * Make sure we don't have any outstanding guest register writes as we may
6389 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6390 */
6391 off = iemNativeRegFlushPendingWrites(pReNative, off);
6392
6393#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6394 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6395#else
6396 RT_NOREF(idxInstr);
6397#endif
6398
6399#ifdef RT_ARCH_AMD64
6400 /*
6401 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6402 * return raisexcpt();
6403 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6404 */
6405 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6406
6407 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6408 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6409 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6410 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6411 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
6412
6413 iemNativeRegFreeTmp(pReNative, iTmpReg);
6414
6415#elif defined(RT_ARCH_ARM64)
6416 /*
6417 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6418 * return raisexcpt();
6419 * ----
6420 * mov x1, 0x800000000000
6421 * add x1, x0, x1
6422 * cmp xzr, x1, lsr 48
6423 * b.ne .Lraisexcpt
6424 */
6425 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6426
6427 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6428 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6429 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6430 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
6431
6432 iemNativeRegFreeTmp(pReNative, iTmpReg);
6433
6434#else
6435# error "Port me"
6436#endif
6437 return off;
6438}
6439
6440
6441/**
6442 * Emits code to check if that the content of @a idxAddrReg is within the limit
6443 * of CS, raising a \#GP(0) if it isn't.
6444 *
6445 * @returns New code buffer offset; throws VBox status code on error.
6446 * @param pReNative The native recompile state.
6447 * @param off The code buffer offset.
6448 * @param idxAddrReg The host register (32-bit) with the address to
6449 * check.
6450 * @param idxInstr The current instruction.
6451 */
6452DECL_HIDDEN_THROW(uint32_t)
6453iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6454 uint8_t idxAddrReg, uint8_t idxInstr)
6455{
6456 /*
6457 * Make sure we don't have any outstanding guest register writes as we may
6458 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6459 */
6460 off = iemNativeRegFlushPendingWrites(pReNative, off);
6461
6462#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6463 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6464#else
6465 RT_NOREF(idxInstr);
6466#endif
6467
6468 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6469 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6470 kIemNativeGstRegUse_ReadOnly);
6471
6472 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6473 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
6474
6475 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6476 return off;
6477}
6478
6479
6480/**
6481 * Emits a call to a CImpl function or something similar.
6482 */
6483DECL_HIDDEN_THROW(uint32_t)
6484iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6485 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6486{
6487 /* Writeback everything. */
6488 off = iemNativeRegFlushPendingWrites(pReNative, off);
6489
6490 /*
6491 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6492 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6493 */
6494 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6495 fGstShwFlush
6496 | RT_BIT_64(kIemNativeGstReg_Pc)
6497 | RT_BIT_64(kIemNativeGstReg_EFlags));
6498 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6499
6500 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6501
6502 /*
6503 * Load the parameters.
6504 */
6505#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6506 /* Special code the hidden VBOXSTRICTRC pointer. */
6507 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6508 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6509 if (cAddParams > 0)
6510 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6511 if (cAddParams > 1)
6512 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6513 if (cAddParams > 2)
6514 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6515 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6516
6517#else
6518 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6519 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6520 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6521 if (cAddParams > 0)
6522 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6523 if (cAddParams > 1)
6524 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6525 if (cAddParams > 2)
6526# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6527 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6528# else
6529 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6530# endif
6531#endif
6532
6533 /*
6534 * Make the call.
6535 */
6536 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6537
6538#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6539 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6540#endif
6541
6542#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6543 pReNative->Core.fDebugPcInitialized = false;
6544 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6545#endif
6546
6547 /*
6548 * Check the status code.
6549 */
6550 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6551}
6552
6553
6554/**
6555 * Emits a call to a threaded worker function.
6556 */
6557DECL_HIDDEN_THROW(uint32_t)
6558iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6559{
6560 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6561
6562 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6563 off = iemNativeRegFlushPendingWrites(pReNative, off);
6564
6565 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6566 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6567
6568#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6569 /* The threaded function may throw / long jmp, so set current instruction
6570 number if we're counting. */
6571 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6572#endif
6573
6574 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6575
6576#ifdef RT_ARCH_AMD64
6577 /* Load the parameters and emit the call. */
6578# ifdef RT_OS_WINDOWS
6579# ifndef VBOXSTRICTRC_STRICT_ENABLED
6580 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6581 if (cParams > 0)
6582 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6583 if (cParams > 1)
6584 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6585 if (cParams > 2)
6586 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6587# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6588 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6589 if (cParams > 0)
6590 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6591 if (cParams > 1)
6592 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6593 if (cParams > 2)
6594 {
6595 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6596 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6597 }
6598 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6599# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6600# else
6601 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6602 if (cParams > 0)
6603 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6604 if (cParams > 1)
6605 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6606 if (cParams > 2)
6607 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6608# endif
6609
6610 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6611
6612# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6613 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6614# endif
6615
6616#elif RT_ARCH_ARM64
6617 /*
6618 * ARM64:
6619 */
6620 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6621 if (cParams > 0)
6622 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6623 if (cParams > 1)
6624 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6625 if (cParams > 2)
6626 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6627
6628 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6629
6630#else
6631# error "port me"
6632#endif
6633
6634#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6635 pReNative->Core.fDebugPcInitialized = false;
6636 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6637#endif
6638
6639 /*
6640 * Check the status code.
6641 */
6642 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6643
6644 return off;
6645}
6646
6647#ifdef VBOX_WITH_STATISTICS
6648
6649/**
6650 * Emits code to update the thread call statistics.
6651 */
6652DECL_INLINE_THROW(uint32_t)
6653iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6654{
6655 /*
6656 * Update threaded function stats.
6657 */
6658 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6659 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6660# if defined(RT_ARCH_ARM64)
6661 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6662 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6663 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6664 iemNativeRegFreeTmp(pReNative, idxTmp1);
6665 iemNativeRegFreeTmp(pReNative, idxTmp2);
6666# else
6667 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6668# endif
6669 return off;
6670}
6671
6672
6673/**
6674 * Emits code to update the TB exit reason statistics.
6675 */
6676DECL_INLINE_THROW(uint32_t)
6677iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6678{
6679 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6680 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6681 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6682 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6683 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6684
6685 return off;
6686}
6687
6688#endif /* VBOX_WITH_STATISTICS */
6689
6690/**
6691 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6692 */
6693static uint32_t
6694iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6695{
6696 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6697 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6698
6699 /* Jump to ReturnBreak if the return register is NULL. */
6700 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6701 true /*f64Bit*/, offReturnBreak);
6702
6703 /* Okay, continue executing the next TB. */
6704 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6705 return off;
6706}
6707
6708#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6709
6710/**
6711 * Worker for iemNativeEmitReturnBreakViaLookup.
6712 */
6713static uint32_t iemNativeEmitViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak,
6714 IEMNATIVELABELTYPE enmLabel, uintptr_t pfnHelper)
6715{
6716 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
6717 if (idxLabel != UINT32_MAX)
6718 {
6719 iemNativeLabelDefine(pReNative, idxLabel, off);
6720 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, pfnHelper);
6721 }
6722 return off;
6723}
6724
6725
6726/**
6727 * Emits the code at the ReturnBreakViaLookup, ReturnBreakViaLookupWithIrq,
6728 * ReturnBreakViaLookupWithTlb and ReturnBreakViaLookupWithTlbAndIrq labels
6729 * (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS or jumps to the next TB).
6730 */
6731static uint32_t iemNativeEmitReturnBreakViaLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnBreakLabel)
6732{
6733 uint32_t const offReturnBreak = pReNative->paLabels[idxReturnBreakLabel].off;
6734 Assert(offReturnBreak < off);
6735
6736 /*
6737 * The lookup table index is in IEMNATIVE_CALL_ARG1_GREG for all.
6738 * The GCPhysPc is in IEMNATIVE_CALL_ARG2_GREG for ReturnBreakViaLookupWithPc.
6739 */
6740 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookup,
6741 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/>);
6742 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
6743 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/>);
6744 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
6745 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/>);
6746 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
6747 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/>);
6748 return off;
6749}
6750
6751#endif /* !IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
6752
6753/**
6754 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6755 */
6756static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6757{
6758 /* set the return status */
6759 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6760}
6761
6762
6763#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6764/**
6765 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6766 */
6767static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6768{
6769 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6770 if (idxLabel != UINT32_MAX)
6771 {
6772 iemNativeLabelDefine(pReNative, idxLabel, off);
6773 /* set the return status */
6774 off = iemNativeEmitCoreReturnWithFlags(pReNative, off);
6775 /* jump back to the return sequence. */
6776 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6777 }
6778 return off;
6779}
6780#endif
6781
6782
6783/**
6784 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6785 */
6786static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6787{
6788 /* set the return status */
6789 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6790}
6791
6792
6793#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6794/**
6795 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6796 */
6797static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6798{
6799 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6800 if (idxLabel != UINT32_MAX)
6801 {
6802 iemNativeLabelDefine(pReNative, idxLabel, off);
6803 /* set the return status */
6804 off = iemNativeEmitCoreReturnBreakFF(pReNative, off);
6805 /* jump back to the return sequence. */
6806 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6807 }
6808 return off;
6809}
6810#endif
6811
6812
6813/**
6814 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6815 */
6816static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6817{
6818 /* set the return status */
6819 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6820}
6821
6822
6823#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6824/**
6825 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6826 */
6827static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6828{
6829 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6830 if (idxLabel != UINT32_MAX)
6831 {
6832 iemNativeLabelDefine(pReNative, idxLabel, off);
6833 /* set the return status */
6834 off = iemNativeEmitCoreReturnBreak(pReNative, off);
6835 /* jump back to the return sequence. */
6836 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6837 }
6838 return off;
6839}
6840#endif
6841
6842
6843/**
6844 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6845 */
6846static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6847{
6848 /*
6849 * Generate the rc + rcPassUp fiddling code.
6850 */
6851 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6852#ifdef RT_ARCH_AMD64
6853# ifdef RT_OS_WINDOWS
6854# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6855 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6856# endif
6857 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6858 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6859# else
6860 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6861 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6862# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6863 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6864# endif
6865# endif
6866# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6867 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6868# endif
6869
6870#else
6871 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6872 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6873 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6874#endif
6875
6876 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6877 return off;
6878}
6879
6880
6881#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6882/**
6883 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6884 */
6885static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6886{
6887 /*
6888 * Generate the rc + rcPassUp fiddling code if needed.
6889 */
6890 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6891 if (idxLabel != UINT32_MAX)
6892 {
6893 iemNativeLabelDefine(pReNative, idxLabel, off);
6894 off = iemNativeEmitCoreRcFiddling(pReNative, off);
6895 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6896 }
6897 return off;
6898}
6899#endif
6900
6901
6902/**
6903 * Emits a standard epilog.
6904 */
6905static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6906{
6907 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6908
6909 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6910
6911 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6912 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6913
6914 /*
6915 * Restore registers and return.
6916 */
6917#ifdef RT_ARCH_AMD64
6918 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6919
6920 /* Reposition esp at the r15 restore point. */
6921 pbCodeBuf[off++] = X86_OP_REX_W;
6922 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6923 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6924 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6925
6926 /* Pop non-volatile registers and return */
6927 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6928 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6929 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6930 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6931 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6932 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6933 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6934 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6935# ifdef RT_OS_WINDOWS
6936 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6937 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6938# endif
6939 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6940 pbCodeBuf[off++] = 0xc9; /* leave */
6941 pbCodeBuf[off++] = 0xc3; /* ret */
6942 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6943
6944#elif RT_ARCH_ARM64
6945 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6946
6947 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6948 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6949 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6950 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6951 IEMNATIVE_FRAME_VAR_SIZE / 8);
6952 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6953 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6954 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6955 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6956 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6957 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6958 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6959 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6960 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6961 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6962 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6963 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6964
6965 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6966 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6967 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6968 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6969
6970 /* retab / ret */
6971# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6972 if (1)
6973 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6974 else
6975# endif
6976 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6977
6978#else
6979# error "port me"
6980#endif
6981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6982
6983 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6984 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6985
6986 return off;
6987}
6988
6989
6990#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6991/**
6992 * Emits a standard epilog.
6993 */
6994static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6995{
6996 /*
6997 * Define label for common return point.
6998 */
6999 *pidxReturnLabel = UINT32_MAX;
7000 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7001 *pidxReturnLabel = idxReturn;
7002
7003 /*
7004 * Emit the code.
7005 */
7006 return iemNativeEmitCoreEpilog(pReNative, off);
7007}
7008#endif
7009
7010
7011#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
7012/**
7013 * Emits a standard prolog.
7014 */
7015static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7016{
7017#ifdef RT_ARCH_AMD64
7018 /*
7019 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7020 * reserving 64 bytes for stack variables plus 4 non-register argument
7021 * slots. Fixed register assignment: xBX = pReNative;
7022 *
7023 * Since we always do the same register spilling, we can use the same
7024 * unwind description for all the code.
7025 */
7026 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7027 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7028 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7029 pbCodeBuf[off++] = 0x8b;
7030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7031 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7032 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7033# ifdef RT_OS_WINDOWS
7034 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7035 pbCodeBuf[off++] = 0x8b;
7036 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7037 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7038 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7039# else
7040 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7041 pbCodeBuf[off++] = 0x8b;
7042 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7043# endif
7044 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7045 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7046 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7047 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7048 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7049 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7050 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7051 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7052
7053# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7054 /* Save the frame pointer. */
7055 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7056# endif
7057
7058 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7059 X86_GREG_xSP,
7060 IEMNATIVE_FRAME_ALIGN_SIZE
7061 + IEMNATIVE_FRAME_VAR_SIZE
7062 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7063 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7064 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7065 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7066 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7067
7068#elif RT_ARCH_ARM64
7069 /*
7070 * We set up a stack frame exactly like on x86, only we have to push the
7071 * return address our selves here. We save all non-volatile registers.
7072 */
7073 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7074
7075# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7076 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7077 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7078 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7079 /* pacibsp */
7080 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7081# endif
7082
7083 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7084 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7085 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7086 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7087 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7088 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7089 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7090 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7091 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7092 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7093 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7094 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7095 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7096 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7097 /* Save the BP and LR (ret address) registers at the top of the frame. */
7098 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7099 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7100 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7101 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7102 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7103 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7104
7105 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7106 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7107
7108 /* mov r28, r0 */
7109 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7110 /* mov r27, r1 */
7111 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7112
7113# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7114 /* Save the frame pointer. */
7115 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7116 ARMV8_A64_REG_X2);
7117# endif
7118
7119#else
7120# error "port me"
7121#endif
7122 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7123 return off;
7124}
7125#endif
7126
7127
7128/*********************************************************************************************************************************
7129* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7130*********************************************************************************************************************************/
7131
7132/**
7133 * Internal work that allocates a variable with kind set to
7134 * kIemNativeVarKind_Invalid and no current stack allocation.
7135 *
7136 * The kind will either be set by the caller or later when the variable is first
7137 * assigned a value.
7138 *
7139 * @returns Unpacked index.
7140 * @internal
7141 */
7142static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7143{
7144 Assert(cbType > 0 && cbType <= 64);
7145 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7146 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7147 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7148 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7149 pReNative->Core.aVars[idxVar].cbVar = cbType;
7150 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7151 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7152 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7153 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7154 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7155 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7156 pReNative->Core.aVars[idxVar].u.uValue = 0;
7157#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7158 pReNative->Core.aVars[idxVar].fSimdReg = false;
7159#endif
7160 return idxVar;
7161}
7162
7163
7164/**
7165 * Internal work that allocates an argument variable w/o setting enmKind.
7166 *
7167 * @returns Unpacked index.
7168 * @internal
7169 */
7170static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7171{
7172 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7173 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7174 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7175
7176 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7177 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7178 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7179 return idxVar;
7180}
7181
7182
7183/**
7184 * Gets the stack slot for a stack variable, allocating one if necessary.
7185 *
7186 * Calling this function implies that the stack slot will contain a valid
7187 * variable value. The caller deals with any register currently assigned to the
7188 * variable, typically by spilling it into the stack slot.
7189 *
7190 * @returns The stack slot number.
7191 * @param pReNative The recompiler state.
7192 * @param idxVar The variable.
7193 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7194 */
7195DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7196{
7197 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7198 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7199 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7200
7201 /* Already got a slot? */
7202 uint8_t const idxStackSlot = pVar->idxStackSlot;
7203 if (idxStackSlot != UINT8_MAX)
7204 {
7205 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7206 return idxStackSlot;
7207 }
7208
7209 /*
7210 * A single slot is easy to allocate.
7211 * Allocate them from the top end, closest to BP, to reduce the displacement.
7212 */
7213 if (pVar->cbVar <= sizeof(uint64_t))
7214 {
7215 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7216 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7217 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7218 pVar->idxStackSlot = (uint8_t)iSlot;
7219 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7220 return (uint8_t)iSlot;
7221 }
7222
7223 /*
7224 * We need more than one stack slot.
7225 *
7226 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7227 */
7228 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7229 Assert(pVar->cbVar <= 64);
7230 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7231 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7232 uint32_t bmStack = pReNative->Core.bmStack;
7233 while (bmStack != UINT32_MAX)
7234 {
7235 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7236 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7237 iSlot = (iSlot - 1) & ~fBitAlignMask;
7238 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7239 {
7240 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7241 pVar->idxStackSlot = (uint8_t)iSlot;
7242 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7243 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7244 return (uint8_t)iSlot;
7245 }
7246
7247 bmStack |= (fBitAllocMask << iSlot);
7248 }
7249 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7250}
7251
7252
7253/**
7254 * Changes the variable to a stack variable.
7255 *
7256 * Currently this is s only possible to do the first time the variable is used,
7257 * switching later is can be implemented but not done.
7258 *
7259 * @param pReNative The recompiler state.
7260 * @param idxVar The variable.
7261 * @throws VERR_IEM_VAR_IPE_2
7262 */
7263DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7264{
7265 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7266 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7267 if (pVar->enmKind != kIemNativeVarKind_Stack)
7268 {
7269 /* We could in theory transition from immediate to stack as well, but it
7270 would involve the caller doing work storing the value on the stack. So,
7271 till that's required we only allow transition from invalid. */
7272 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7273 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7274 pVar->enmKind = kIemNativeVarKind_Stack;
7275
7276 /* Note! We don't allocate a stack slot here, that's only done when a
7277 slot is actually needed to hold a variable value. */
7278 }
7279}
7280
7281
7282/**
7283 * Sets it to a variable with a constant value.
7284 *
7285 * This does not require stack storage as we know the value and can always
7286 * reload it, unless of course it's referenced.
7287 *
7288 * @param pReNative The recompiler state.
7289 * @param idxVar The variable.
7290 * @param uValue The immediate value.
7291 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7292 */
7293DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7294{
7295 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7296 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7297 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7298 {
7299 /* Only simple transitions for now. */
7300 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7301 pVar->enmKind = kIemNativeVarKind_Immediate;
7302 }
7303 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7304
7305 pVar->u.uValue = uValue;
7306 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7307 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7308 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7309}
7310
7311
7312/**
7313 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7314 *
7315 * This does not require stack storage as we know the value and can always
7316 * reload it. Loading is postponed till needed.
7317 *
7318 * @param pReNative The recompiler state.
7319 * @param idxVar The variable. Unpacked.
7320 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7321 *
7322 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7323 * @internal
7324 */
7325static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7326{
7327 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7328 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7329
7330 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7331 {
7332 /* Only simple transitions for now. */
7333 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7334 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7335 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7336 }
7337 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7338
7339 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7340
7341 /* Update the other variable, ensure it's a stack variable. */
7342 /** @todo handle variables with const values... that'll go boom now. */
7343 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7344 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7345}
7346
7347
7348/**
7349 * Sets the variable to a reference (pointer) to a guest register reference.
7350 *
7351 * This does not require stack storage as we know the value and can always
7352 * reload it. Loading is postponed till needed.
7353 *
7354 * @param pReNative The recompiler state.
7355 * @param idxVar The variable.
7356 * @param enmRegClass The class guest registers to reference.
7357 * @param idxReg The register within @a enmRegClass to reference.
7358 *
7359 * @throws VERR_IEM_VAR_IPE_2
7360 */
7361DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7362 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7363{
7364 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7365 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7366
7367 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7368 {
7369 /* Only simple transitions for now. */
7370 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7371 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7372 }
7373 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7374
7375 pVar->u.GstRegRef.enmClass = enmRegClass;
7376 pVar->u.GstRegRef.idx = idxReg;
7377}
7378
7379
7380DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7381{
7382 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7383}
7384
7385
7386DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7387{
7388 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7389
7390 /* Since we're using a generic uint64_t value type, we must truncate it if
7391 the variable is smaller otherwise we may end up with too large value when
7392 scaling up a imm8 w/ sign-extension.
7393
7394 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7395 in the bios, bx=1) when running on arm, because clang expect 16-bit
7396 register parameters to have bits 16 and up set to zero. Instead of
7397 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7398 CF value in the result. */
7399 switch (cbType)
7400 {
7401 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7402 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7403 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7404 }
7405 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7406 return idxVar;
7407}
7408
7409
7410DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7411{
7412 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7413 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7414 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7415 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7416 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7417 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7418
7419 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7420 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7421 return idxArgVar;
7422}
7423
7424
7425DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7426{
7427 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7428 /* Don't set to stack now, leave that to the first use as for instance
7429 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7430 return idxVar;
7431}
7432
7433
7434DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7435{
7436 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7437
7438 /* Since we're using a generic uint64_t value type, we must truncate it if
7439 the variable is smaller otherwise we may end up with too large value when
7440 scaling up a imm8 w/ sign-extension. */
7441 switch (cbType)
7442 {
7443 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7444 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7445 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7446 }
7447 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7448 return idxVar;
7449}
7450
7451
7452DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
7453{
7454 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7455 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7456
7457 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7458 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7459
7460 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7461
7462 /* Truncate the value to this variables size. */
7463 switch (cbType)
7464 {
7465 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7466 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7467 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7468 }
7469
7470 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7471 iemNativeVarRegisterRelease(pReNative, idxVar);
7472 return idxVar;
7473}
7474
7475
7476/**
7477 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7478 * fixed till we call iemNativeVarRegisterRelease.
7479 *
7480 * @returns The host register number.
7481 * @param pReNative The recompiler state.
7482 * @param idxVar The variable.
7483 * @param poff Pointer to the instruction buffer offset.
7484 * In case a register needs to be freed up or the value
7485 * loaded off the stack.
7486 * @param fInitialized Set if the variable must already have been initialized.
7487 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7488 * the case.
7489 * @param idxRegPref Preferred register number or UINT8_MAX.
7490 */
7491DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7492 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7493{
7494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7495 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7496 Assert(pVar->cbVar <= 8);
7497 Assert(!pVar->fRegAcquired);
7498
7499 uint8_t idxReg = pVar->idxReg;
7500 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7501 {
7502 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7503 && pVar->enmKind < kIemNativeVarKind_End);
7504 pVar->fRegAcquired = true;
7505 return idxReg;
7506 }
7507
7508 /*
7509 * If the kind of variable has not yet been set, default to 'stack'.
7510 */
7511 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7512 && pVar->enmKind < kIemNativeVarKind_End);
7513 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7514 iemNativeVarSetKindToStack(pReNative, idxVar);
7515
7516 /*
7517 * We have to allocate a register for the variable, even if its a stack one
7518 * as we don't know if there are modification being made to it before its
7519 * finalized (todo: analyze and insert hints about that?).
7520 *
7521 * If we can, we try get the correct register for argument variables. This
7522 * is assuming that most argument variables are fetched as close as possible
7523 * to the actual call, so that there aren't any interfering hidden calls
7524 * (memory accesses, etc) inbetween.
7525 *
7526 * If we cannot or it's a variable, we make sure no argument registers
7527 * that will be used by this MC block will be allocated here, and we always
7528 * prefer non-volatile registers to avoid needing to spill stuff for internal
7529 * call.
7530 */
7531 /** @todo Detect too early argument value fetches and warn about hidden
7532 * calls causing less optimal code to be generated in the python script. */
7533
7534 uint8_t const uArgNo = pVar->uArgNo;
7535 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7536 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7537 {
7538 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7539
7540#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7541 /* Writeback any dirty shadow registers we are about to unshadow. */
7542 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7543#endif
7544
7545 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7546 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7547 }
7548 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7549 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7550 {
7551 /** @todo there must be a better way for this and boot cArgsX? */
7552 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7553 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7554 & ~pReNative->Core.bmHstRegsWithGstShadow
7555 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7556 & fNotArgsMask;
7557 if (fRegs)
7558 {
7559 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7560 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7561 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7562 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7563 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7564 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7565 }
7566 else
7567 {
7568 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7569 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7570 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7571 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7572 }
7573 }
7574 else
7575 {
7576 idxReg = idxRegPref;
7577 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7578 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7579 }
7580 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7581 pVar->idxReg = idxReg;
7582
7583#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7584 pVar->fSimdReg = false;
7585#endif
7586
7587 /*
7588 * Load it off the stack if we've got a stack slot.
7589 */
7590 uint8_t const idxStackSlot = pVar->idxStackSlot;
7591 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7592 {
7593 Assert(fInitialized);
7594 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7595 switch (pVar->cbVar)
7596 {
7597 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7598 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7599 case 3: AssertFailed(); RT_FALL_THRU();
7600 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7601 default: AssertFailed(); RT_FALL_THRU();
7602 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7603 }
7604 }
7605 else
7606 {
7607 Assert(idxStackSlot == UINT8_MAX);
7608 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7609 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7610 else
7611 {
7612 /*
7613 * Convert from immediate to stack/register. This is currently only
7614 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7615 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7616 */
7617 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7618 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7619 idxVar, idxReg, pVar->u.uValue));
7620 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7621 pVar->enmKind = kIemNativeVarKind_Stack;
7622 }
7623 }
7624
7625 pVar->fRegAcquired = true;
7626 return idxReg;
7627}
7628
7629
7630#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7631/**
7632 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7633 * fixed till we call iemNativeVarRegisterRelease.
7634 *
7635 * @returns The host register number.
7636 * @param pReNative The recompiler state.
7637 * @param idxVar The variable.
7638 * @param poff Pointer to the instruction buffer offset.
7639 * In case a register needs to be freed up or the value
7640 * loaded off the stack.
7641 * @param fInitialized Set if the variable must already have been initialized.
7642 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7643 * the case.
7644 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7645 */
7646DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7647 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7648{
7649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7650 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7651 Assert( pVar->cbVar == sizeof(RTUINT128U)
7652 || pVar->cbVar == sizeof(RTUINT256U));
7653 Assert(!pVar->fRegAcquired);
7654
7655 uint8_t idxReg = pVar->idxReg;
7656 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7657 {
7658 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7659 && pVar->enmKind < kIemNativeVarKind_End);
7660 pVar->fRegAcquired = true;
7661 return idxReg;
7662 }
7663
7664 /*
7665 * If the kind of variable has not yet been set, default to 'stack'.
7666 */
7667 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7668 && pVar->enmKind < kIemNativeVarKind_End);
7669 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7670 iemNativeVarSetKindToStack(pReNative, idxVar);
7671
7672 /*
7673 * We have to allocate a register for the variable, even if its a stack one
7674 * as we don't know if there are modification being made to it before its
7675 * finalized (todo: analyze and insert hints about that?).
7676 *
7677 * If we can, we try get the correct register for argument variables. This
7678 * is assuming that most argument variables are fetched as close as possible
7679 * to the actual call, so that there aren't any interfering hidden calls
7680 * (memory accesses, etc) inbetween.
7681 *
7682 * If we cannot or it's a variable, we make sure no argument registers
7683 * that will be used by this MC block will be allocated here, and we always
7684 * prefer non-volatile registers to avoid needing to spill stuff for internal
7685 * call.
7686 */
7687 /** @todo Detect too early argument value fetches and warn about hidden
7688 * calls causing less optimal code to be generated in the python script. */
7689
7690 uint8_t const uArgNo = pVar->uArgNo;
7691 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7692
7693 /* SIMD is bit simpler for now because there is no support for arguments. */
7694 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7695 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7696 {
7697 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7698 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7699 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7700 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7701 & fNotArgsMask;
7702 if (fRegs)
7703 {
7704 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7705 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7706 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7707 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7708 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7709 }
7710 else
7711 {
7712 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7713 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7714 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7715 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7716 }
7717 }
7718 else
7719 {
7720 idxReg = idxRegPref;
7721 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7722 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7723 }
7724 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7725
7726 pVar->fSimdReg = true;
7727 pVar->idxReg = idxReg;
7728
7729 /*
7730 * Load it off the stack if we've got a stack slot.
7731 */
7732 uint8_t const idxStackSlot = pVar->idxStackSlot;
7733 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7734 {
7735 Assert(fInitialized);
7736 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7737 switch (pVar->cbVar)
7738 {
7739 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7740 default: AssertFailed(); RT_FALL_THRU();
7741 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7742 }
7743 }
7744 else
7745 {
7746 Assert(idxStackSlot == UINT8_MAX);
7747 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7748 }
7749 pVar->fRegAcquired = true;
7750 return idxReg;
7751}
7752#endif
7753
7754
7755/**
7756 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7757 * guest register.
7758 *
7759 * This function makes sure there is a register for it and sets it to be the
7760 * current shadow copy of @a enmGstReg.
7761 *
7762 * @returns The host register number.
7763 * @param pReNative The recompiler state.
7764 * @param idxVar The variable.
7765 * @param enmGstReg The guest register this variable will be written to
7766 * after this call.
7767 * @param poff Pointer to the instruction buffer offset.
7768 * In case a register needs to be freed up or if the
7769 * variable content needs to be loaded off the stack.
7770 *
7771 * @note We DO NOT expect @a idxVar to be an argument variable,
7772 * because we can only in the commit stage of an instruction when this
7773 * function is used.
7774 */
7775DECL_HIDDEN_THROW(uint8_t)
7776iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7777{
7778 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7779 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7780 Assert(!pVar->fRegAcquired);
7781 AssertMsgStmt( pVar->cbVar <= 8
7782 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7783 || pVar->enmKind == kIemNativeVarKind_Stack),
7784 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7785 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7786 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7787
7788 /*
7789 * This shouldn't ever be used for arguments, unless it's in a weird else
7790 * branch that doesn't do any calling and even then it's questionable.
7791 *
7792 * However, in case someone writes crazy wrong MC code and does register
7793 * updates before making calls, just use the regular register allocator to
7794 * ensure we get a register suitable for the intended argument number.
7795 */
7796 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7797
7798 /*
7799 * If there is already a register for the variable, we transfer/set the
7800 * guest shadow copy assignment to it.
7801 */
7802 uint8_t idxReg = pVar->idxReg;
7803 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7804 {
7805#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7806 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7807 {
7808# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7809 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7810 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7811# endif
7812 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7813 }
7814#endif
7815
7816 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7817 {
7818 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7819 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7820 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7821 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7822 }
7823 else
7824 {
7825 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7826 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7827 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7828 }
7829 /** @todo figure this one out. We need some way of making sure the register isn't
7830 * modified after this point, just in case we start writing crappy MC code. */
7831 pVar->enmGstReg = enmGstReg;
7832 pVar->fRegAcquired = true;
7833 return idxReg;
7834 }
7835 Assert(pVar->uArgNo == UINT8_MAX);
7836
7837 /*
7838 * Because this is supposed to be the commit stage, we're just tag along with the
7839 * temporary register allocator and upgrade it to a variable register.
7840 */
7841 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7842 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7843 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7844 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7845 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7846 pVar->idxReg = idxReg;
7847
7848 /*
7849 * Now we need to load the register value.
7850 */
7851 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7852 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7853 else
7854 {
7855 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7856 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7857 switch (pVar->cbVar)
7858 {
7859 case sizeof(uint64_t):
7860 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7861 break;
7862 case sizeof(uint32_t):
7863 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7864 break;
7865 case sizeof(uint16_t):
7866 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7867 break;
7868 case sizeof(uint8_t):
7869 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7870 break;
7871 default:
7872 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7873 }
7874 }
7875
7876 pVar->fRegAcquired = true;
7877 return idxReg;
7878}
7879
7880
7881/**
7882 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7883 *
7884 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7885 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7886 * requirement of flushing anything in volatile host registers when making a
7887 * call.
7888 *
7889 * @returns New @a off value.
7890 * @param pReNative The recompiler state.
7891 * @param off The code buffer position.
7892 * @param fHstRegsNotToSave Set of registers not to save & restore.
7893 */
7894DECL_HIDDEN_THROW(uint32_t)
7895iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7896{
7897 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7898 if (fHstRegs)
7899 {
7900 do
7901 {
7902 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7903 fHstRegs &= ~RT_BIT_32(idxHstReg);
7904
7905 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7906 {
7907 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7908 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7909 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7910 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7911 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7912 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7913 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7914 {
7915 case kIemNativeVarKind_Stack:
7916 {
7917 /* Temporarily spill the variable register. */
7918 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7919 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7920 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7921 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7922 continue;
7923 }
7924
7925 case kIemNativeVarKind_Immediate:
7926 case kIemNativeVarKind_VarRef:
7927 case kIemNativeVarKind_GstRegRef:
7928 /* It is weird to have any of these loaded at this point. */
7929 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7930 continue;
7931
7932 case kIemNativeVarKind_End:
7933 case kIemNativeVarKind_Invalid:
7934 break;
7935 }
7936 AssertFailed();
7937 }
7938 else
7939 {
7940 /*
7941 * Allocate a temporary stack slot and spill the register to it.
7942 */
7943 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7944 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7945 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7946 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7947 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7948 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7949 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7950 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7951 }
7952 } while (fHstRegs);
7953 }
7954#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7955
7956 /*
7957 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7958 * which would be more difficult due to spanning multiple stack slots and different sizes
7959 * (besides we only have a limited amount of slots at the moment).
7960 *
7961 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7962 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7963 */
7964 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7965
7966 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7967 if (fHstRegs)
7968 {
7969 do
7970 {
7971 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7972 fHstRegs &= ~RT_BIT_32(idxHstReg);
7973
7974 /* Fixed reserved and temporary registers don't need saving. */
7975 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7976 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7977 continue;
7978
7979 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7980
7981 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7982 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7983 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7984 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7985 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7986 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7987 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7988 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7989 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7990 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7991 {
7992 case kIemNativeVarKind_Stack:
7993 {
7994 /* Temporarily spill the variable register. */
7995 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7996 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7997 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7998 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7999 if (cbVar == sizeof(RTUINT128U))
8000 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8001 else
8002 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8003 continue;
8004 }
8005
8006 case kIemNativeVarKind_Immediate:
8007 case kIemNativeVarKind_VarRef:
8008 case kIemNativeVarKind_GstRegRef:
8009 /* It is weird to have any of these loaded at this point. */
8010 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8011 continue;
8012
8013 case kIemNativeVarKind_End:
8014 case kIemNativeVarKind_Invalid:
8015 break;
8016 }
8017 AssertFailed();
8018 } while (fHstRegs);
8019 }
8020#endif
8021 return off;
8022}
8023
8024
8025/**
8026 * Emit code to restore volatile registers after to a call to a helper.
8027 *
8028 * @returns New @a off value.
8029 * @param pReNative The recompiler state.
8030 * @param off The code buffer position.
8031 * @param fHstRegsNotToSave Set of registers not to save & restore.
8032 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8033 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8034 */
8035DECL_HIDDEN_THROW(uint32_t)
8036iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8037{
8038 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
8039 if (fHstRegs)
8040 {
8041 do
8042 {
8043 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8044 fHstRegs &= ~RT_BIT_32(idxHstReg);
8045
8046 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8047 {
8048 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8049 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8050 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8051 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8052 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8053 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8054 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8055 {
8056 case kIemNativeVarKind_Stack:
8057 {
8058 /* Unspill the variable register. */
8059 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8060 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8061 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8062 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8063 continue;
8064 }
8065
8066 case kIemNativeVarKind_Immediate:
8067 case kIemNativeVarKind_VarRef:
8068 case kIemNativeVarKind_GstRegRef:
8069 /* It is weird to have any of these loaded at this point. */
8070 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8071 continue;
8072
8073 case kIemNativeVarKind_End:
8074 case kIemNativeVarKind_Invalid:
8075 break;
8076 }
8077 AssertFailed();
8078 }
8079 else
8080 {
8081 /*
8082 * Restore from temporary stack slot.
8083 */
8084 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8085 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8086 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8087 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8088
8089 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8090 }
8091 } while (fHstRegs);
8092 }
8093#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8094 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8095 if (fHstRegs)
8096 {
8097 do
8098 {
8099 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8100 fHstRegs &= ~RT_BIT_32(idxHstReg);
8101
8102 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8103 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8104 continue;
8105 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8106
8107 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8108 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8109 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8110 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8111 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8112 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8113 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8114 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8115 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8116 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8117 {
8118 case kIemNativeVarKind_Stack:
8119 {
8120 /* Unspill the variable register. */
8121 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8122 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8123 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8124 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8125
8126 if (cbVar == sizeof(RTUINT128U))
8127 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8128 else
8129 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8130 continue;
8131 }
8132
8133 case kIemNativeVarKind_Immediate:
8134 case kIemNativeVarKind_VarRef:
8135 case kIemNativeVarKind_GstRegRef:
8136 /* It is weird to have any of these loaded at this point. */
8137 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8138 continue;
8139
8140 case kIemNativeVarKind_End:
8141 case kIemNativeVarKind_Invalid:
8142 break;
8143 }
8144 AssertFailed();
8145 } while (fHstRegs);
8146 }
8147#endif
8148 return off;
8149}
8150
8151
8152/**
8153 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8154 *
8155 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8156 *
8157 * ASSUMES that @a idxVar is valid and unpacked.
8158 */
8159DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8160{
8161 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8162 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8163 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8164 {
8165 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8166 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8167 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8168 Assert(cSlots > 0);
8169 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8170 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8171 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8172 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8173 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8174 }
8175 else
8176 Assert(idxStackSlot == UINT8_MAX);
8177}
8178
8179
8180/**
8181 * Worker that frees a single variable.
8182 *
8183 * ASSUMES that @a idxVar is valid and unpacked.
8184 */
8185DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8186{
8187 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8188 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8189 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8190
8191 /* Free the host register first if any assigned. */
8192 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8193#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8194 if ( idxHstReg != UINT8_MAX
8195 && pReNative->Core.aVars[idxVar].fSimdReg)
8196 {
8197 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8198 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8199 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8200 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8201 }
8202 else
8203#endif
8204 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8205 {
8206 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8207 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8208 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8209 }
8210
8211 /* Free argument mapping. */
8212 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8213 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8214 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8215
8216 /* Free the stack slots. */
8217 iemNativeVarFreeStackSlots(pReNative, idxVar);
8218
8219 /* Free the actual variable. */
8220 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8221 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8222}
8223
8224
8225/**
8226 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8227 */
8228DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8229{
8230 while (bmVars != 0)
8231 {
8232 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8233 bmVars &= ~RT_BIT_32(idxVar);
8234
8235#if 1 /** @todo optimize by simplifying this later... */
8236 iemNativeVarFreeOneWorker(pReNative, idxVar);
8237#else
8238 /* Only need to free the host register, the rest is done as bulk updates below. */
8239 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8240 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8241 {
8242 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8243 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8244 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8245 }
8246#endif
8247 }
8248#if 0 /** @todo optimize by simplifying this later... */
8249 pReNative->Core.bmVars = 0;
8250 pReNative->Core.bmStack = 0;
8251 pReNative->Core.u64ArgVars = UINT64_MAX;
8252#endif
8253}
8254
8255
8256
8257/*********************************************************************************************************************************
8258* Emitters for IEM_MC_CALL_CIMPL_XXX *
8259*********************************************************************************************************************************/
8260
8261/**
8262 * Emits code to load a reference to the given guest register into @a idxGprDst.
8263 */
8264DECL_HIDDEN_THROW(uint32_t)
8265iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8266 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8267{
8268#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8269 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8270#endif
8271
8272 /*
8273 * Get the offset relative to the CPUMCTX structure.
8274 */
8275 uint32_t offCpumCtx;
8276 switch (enmClass)
8277 {
8278 case kIemNativeGstRegRef_Gpr:
8279 Assert(idxRegInClass < 16);
8280 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8281 break;
8282
8283 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8284 Assert(idxRegInClass < 4);
8285 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8286 break;
8287
8288 case kIemNativeGstRegRef_EFlags:
8289 Assert(idxRegInClass == 0);
8290 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8291 break;
8292
8293 case kIemNativeGstRegRef_MxCsr:
8294 Assert(idxRegInClass == 0);
8295 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8296 break;
8297
8298 case kIemNativeGstRegRef_FpuReg:
8299 Assert(idxRegInClass < 8);
8300 AssertFailed(); /** @todo what kind of indexing? */
8301 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8302 break;
8303
8304 case kIemNativeGstRegRef_MReg:
8305 Assert(idxRegInClass < 8);
8306 AssertFailed(); /** @todo what kind of indexing? */
8307 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8308 break;
8309
8310 case kIemNativeGstRegRef_XReg:
8311 Assert(idxRegInClass < 16);
8312 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8313 break;
8314
8315 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8316 Assert(idxRegInClass == 0);
8317 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8318 break;
8319
8320 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8321 Assert(idxRegInClass == 0);
8322 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8323 break;
8324
8325 default:
8326 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8327 }
8328
8329 /*
8330 * Load the value into the destination register.
8331 */
8332#ifdef RT_ARCH_AMD64
8333 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8334
8335#elif defined(RT_ARCH_ARM64)
8336 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8337 Assert(offCpumCtx < 4096);
8338 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8339
8340#else
8341# error "Port me!"
8342#endif
8343
8344 return off;
8345}
8346
8347
8348/**
8349 * Common code for CIMPL and AIMPL calls.
8350 *
8351 * These are calls that uses argument variables and such. They should not be
8352 * confused with internal calls required to implement an MC operation,
8353 * like a TLB load and similar.
8354 *
8355 * Upon return all that is left to do is to load any hidden arguments and
8356 * perform the call. All argument variables are freed.
8357 *
8358 * @returns New code buffer offset; throws VBox status code on error.
8359 * @param pReNative The native recompile state.
8360 * @param off The code buffer offset.
8361 * @param cArgs The total nubmer of arguments (includes hidden
8362 * count).
8363 * @param cHiddenArgs The number of hidden arguments. The hidden
8364 * arguments must not have any variable declared for
8365 * them, whereas all the regular arguments must
8366 * (tstIEMCheckMc ensures this).
8367 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8368 * this will still flush pending writes in call volatile registers if false.
8369 */
8370DECL_HIDDEN_THROW(uint32_t)
8371iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8372 bool fFlushPendingWrites /*= true*/)
8373{
8374#ifdef VBOX_STRICT
8375 /*
8376 * Assert sanity.
8377 */
8378 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8379 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8380 for (unsigned i = 0; i < cHiddenArgs; i++)
8381 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8382 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8383 {
8384 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8385 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8386 }
8387 iemNativeRegAssertSanity(pReNative);
8388#endif
8389
8390 /* We don't know what the called function makes use of, so flush any pending register writes. */
8391 RT_NOREF(fFlushPendingWrites);
8392#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8393 if (fFlushPendingWrites)
8394#endif
8395 off = iemNativeRegFlushPendingWrites(pReNative, off);
8396
8397 /*
8398 * Before we do anything else, go over variables that are referenced and
8399 * make sure they are not in a register.
8400 */
8401 uint32_t bmVars = pReNative->Core.bmVars;
8402 if (bmVars)
8403 {
8404 do
8405 {
8406 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8407 bmVars &= ~RT_BIT_32(idxVar);
8408
8409 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8410 {
8411 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8412#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8413 if ( idxRegOld != UINT8_MAX
8414 && pReNative->Core.aVars[idxVar].fSimdReg)
8415 {
8416 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8417 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8418
8419 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8420 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8421 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8422 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8423 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8424 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8425 else
8426 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8427
8428 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8429 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8430
8431 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8432 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8433 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8434 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8435 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8436 }
8437 else
8438#endif
8439 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8440 {
8441 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8442 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8443 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8444 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8445 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8446
8447 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8448 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8449 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8450 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8451 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8452 }
8453 }
8454 } while (bmVars != 0);
8455#if 0 //def VBOX_STRICT
8456 iemNativeRegAssertSanity(pReNative);
8457#endif
8458 }
8459
8460 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8461
8462#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8463 /*
8464 * At the very first step go over the host registers that will be used for arguments
8465 * don't shadow anything which needs writing back first.
8466 */
8467 for (uint32_t i = 0; i < cRegArgs; i++)
8468 {
8469 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8470
8471 /* Writeback any dirty guest shadows before using this register. */
8472 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8473 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8474 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8475 }
8476#endif
8477
8478 /*
8479 * First, go over the host registers that will be used for arguments and make
8480 * sure they either hold the desired argument or are free.
8481 */
8482 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8483 {
8484 for (uint32_t i = 0; i < cRegArgs; i++)
8485 {
8486 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8487 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8488 {
8489 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8490 {
8491 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8492 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8493 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8494 Assert(pVar->idxReg == idxArgReg);
8495 uint8_t const uArgNo = pVar->uArgNo;
8496 if (uArgNo == i)
8497 { /* prefect */ }
8498 /* The variable allocator logic should make sure this is impossible,
8499 except for when the return register is used as a parameter (ARM,
8500 but not x86). */
8501#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8502 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8503 {
8504# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8505# error "Implement this"
8506# endif
8507 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8508 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8509 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8510 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8511 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8512 }
8513#endif
8514 else
8515 {
8516 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8517
8518 if (pVar->enmKind == kIemNativeVarKind_Stack)
8519 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8520 else
8521 {
8522 /* just free it, can be reloaded if used again */
8523 pVar->idxReg = UINT8_MAX;
8524 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8525 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8526 }
8527 }
8528 }
8529 else
8530 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8531 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8532 }
8533 }
8534#if 0 //def VBOX_STRICT
8535 iemNativeRegAssertSanity(pReNative);
8536#endif
8537 }
8538
8539 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8540
8541#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8542 /*
8543 * If there are any stack arguments, make sure they are in their place as well.
8544 *
8545 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8546 * the caller) be loading it later and it must be free (see first loop).
8547 */
8548 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8549 {
8550 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8551 {
8552 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8553 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8554 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8555 {
8556 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8557 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8558 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8559 pVar->idxReg = UINT8_MAX;
8560 }
8561 else
8562 {
8563 /* Use ARG0 as temp for stuff we need registers for. */
8564 switch (pVar->enmKind)
8565 {
8566 case kIemNativeVarKind_Stack:
8567 {
8568 uint8_t const idxStackSlot = pVar->idxStackSlot;
8569 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8570 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8571 iemNativeStackCalcBpDisp(idxStackSlot));
8572 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8573 continue;
8574 }
8575
8576 case kIemNativeVarKind_Immediate:
8577 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8578 continue;
8579
8580 case kIemNativeVarKind_VarRef:
8581 {
8582 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8583 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8584 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8585 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8586 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8587# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8588 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8589 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8590 if ( fSimdReg
8591 && idxRegOther != UINT8_MAX)
8592 {
8593 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8594 if (cbVar == sizeof(RTUINT128U))
8595 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8596 else
8597 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8598 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8599 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8600 }
8601 else
8602# endif
8603 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8604 {
8605 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8606 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8607 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8608 }
8609 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8610 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8611 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8612 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8613 continue;
8614 }
8615
8616 case kIemNativeVarKind_GstRegRef:
8617 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8618 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8619 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8620 continue;
8621
8622 case kIemNativeVarKind_Invalid:
8623 case kIemNativeVarKind_End:
8624 break;
8625 }
8626 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8627 }
8628 }
8629# if 0 //def VBOX_STRICT
8630 iemNativeRegAssertSanity(pReNative);
8631# endif
8632 }
8633#else
8634 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8635#endif
8636
8637 /*
8638 * Make sure the argument variables are loaded into their respective registers.
8639 *
8640 * We can optimize this by ASSUMING that any register allocations are for
8641 * registeres that have already been loaded and are ready. The previous step
8642 * saw to that.
8643 */
8644 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8645 {
8646 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8647 {
8648 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8649 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8650 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8651 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8652 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8653 else
8654 {
8655 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8656 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8657 {
8658 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8659 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8660 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8661 | RT_BIT_32(idxArgReg);
8662 pVar->idxReg = idxArgReg;
8663 }
8664 else
8665 {
8666 /* Use ARG0 as temp for stuff we need registers for. */
8667 switch (pVar->enmKind)
8668 {
8669 case kIemNativeVarKind_Stack:
8670 {
8671 uint8_t const idxStackSlot = pVar->idxStackSlot;
8672 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8673 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8674 continue;
8675 }
8676
8677 case kIemNativeVarKind_Immediate:
8678 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8679 continue;
8680
8681 case kIemNativeVarKind_VarRef:
8682 {
8683 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8684 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8685 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8686 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8687 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8688 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8689#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8690 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8691 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8692 if ( fSimdReg
8693 && idxRegOther != UINT8_MAX)
8694 {
8695 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8696 if (cbVar == sizeof(RTUINT128U))
8697 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8698 else
8699 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8700 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8701 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8702 }
8703 else
8704#endif
8705 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8706 {
8707 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8708 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8709 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8710 }
8711 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8712 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8713 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8714 continue;
8715 }
8716
8717 case kIemNativeVarKind_GstRegRef:
8718 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8719 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8720 continue;
8721
8722 case kIemNativeVarKind_Invalid:
8723 case kIemNativeVarKind_End:
8724 break;
8725 }
8726 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8727 }
8728 }
8729 }
8730#if 0 //def VBOX_STRICT
8731 iemNativeRegAssertSanity(pReNative);
8732#endif
8733 }
8734#ifdef VBOX_STRICT
8735 else
8736 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8737 {
8738 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8739 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8740 }
8741#endif
8742
8743 /*
8744 * Free all argument variables (simplified).
8745 * Their lifetime always expires with the call they are for.
8746 */
8747 /** @todo Make the python script check that arguments aren't used after
8748 * IEM_MC_CALL_XXXX. */
8749 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8750 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8751 * an argument value. There is also some FPU stuff. */
8752 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8753 {
8754 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8755 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8756
8757 /* no need to free registers: */
8758 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8759 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8760 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8761 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8762 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8763 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8764
8765 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8766 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8767 iemNativeVarFreeStackSlots(pReNative, idxVar);
8768 }
8769 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8770
8771 /*
8772 * Flush volatile registers as we make the call.
8773 */
8774 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8775
8776 return off;
8777}
8778
8779
8780
8781/*********************************************************************************************************************************
8782* TLB Lookup. *
8783*********************************************************************************************************************************/
8784
8785/**
8786 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8787 */
8788DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8789{
8790 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8791 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8792 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8793 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8794 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8795
8796 /* Do the lookup manually. */
8797 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8798 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8799 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8800 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8801 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8802 {
8803 /*
8804 * Check TLB page table level access flags.
8805 */
8806 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8807 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8808 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8809 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8810 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8811 | IEMTLBE_F_PG_UNASSIGNED
8812 | IEMTLBE_F_PT_NO_ACCESSED
8813 | fNoWriteNoDirty | fNoUser);
8814 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8815 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8816 {
8817 /*
8818 * Return the address.
8819 */
8820 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8821 if ((uintptr_t)pbAddr == uResult)
8822 return;
8823 RT_NOREF(cbMem);
8824 AssertFailed();
8825 }
8826 else
8827 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8828 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8829 }
8830 else
8831 AssertFailed();
8832 RT_BREAKPOINT();
8833}
8834
8835/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8836
8837
8838
8839/*********************************************************************************************************************************
8840* Recompiler Core. *
8841*********************************************************************************************************************************/
8842
8843/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8844static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8845{
8846 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8847 pDis->cbCachedInstr += cbMaxRead;
8848 RT_NOREF(cbMinRead);
8849 return VERR_NO_DATA;
8850}
8851
8852
8853DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8854{
8855 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8856 {
8857#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8858 ENTRY(fLocalForcedActions),
8859 ENTRY(iem.s.rcPassUp),
8860 ENTRY(iem.s.fExec),
8861 ENTRY(iem.s.pbInstrBuf),
8862 ENTRY(iem.s.uInstrBufPc),
8863 ENTRY(iem.s.GCPhysInstrBuf),
8864 ENTRY(iem.s.cbInstrBufTotal),
8865 ENTRY(iem.s.idxTbCurInstr),
8866 ENTRY(iem.s.fSkippingEFlags),
8867#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8868 ENTRY(iem.s.uPcUpdatingDebug),
8869#endif
8870#ifdef VBOX_WITH_STATISTICS
8871 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8872 ENTRY(iem.s.StatNativeTlbHitsForStore),
8873 ENTRY(iem.s.StatNativeTlbHitsForStack),
8874 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8875 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8876 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8877 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8878 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8879#endif
8880 ENTRY(iem.s.DataTlb.uTlbRevision),
8881 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8882 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8883 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8884 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8885 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8886 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8887 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8888 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8889 ENTRY(iem.s.DataTlb.aEntries),
8890 ENTRY(iem.s.CodeTlb.uTlbRevision),
8891 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8892 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8893 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8894 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8895 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8896 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8897 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8898 ENTRY(iem.s.CodeTlb.aEntries),
8899 ENTRY(pVMR3),
8900 ENTRY(cpum.GstCtx.rax),
8901 ENTRY(cpum.GstCtx.ah),
8902 ENTRY(cpum.GstCtx.rcx),
8903 ENTRY(cpum.GstCtx.ch),
8904 ENTRY(cpum.GstCtx.rdx),
8905 ENTRY(cpum.GstCtx.dh),
8906 ENTRY(cpum.GstCtx.rbx),
8907 ENTRY(cpum.GstCtx.bh),
8908 ENTRY(cpum.GstCtx.rsp),
8909 ENTRY(cpum.GstCtx.rbp),
8910 ENTRY(cpum.GstCtx.rsi),
8911 ENTRY(cpum.GstCtx.rdi),
8912 ENTRY(cpum.GstCtx.r8),
8913 ENTRY(cpum.GstCtx.r9),
8914 ENTRY(cpum.GstCtx.r10),
8915 ENTRY(cpum.GstCtx.r11),
8916 ENTRY(cpum.GstCtx.r12),
8917 ENTRY(cpum.GstCtx.r13),
8918 ENTRY(cpum.GstCtx.r14),
8919 ENTRY(cpum.GstCtx.r15),
8920 ENTRY(cpum.GstCtx.es.Sel),
8921 ENTRY(cpum.GstCtx.es.u64Base),
8922 ENTRY(cpum.GstCtx.es.u32Limit),
8923 ENTRY(cpum.GstCtx.es.Attr),
8924 ENTRY(cpum.GstCtx.cs.Sel),
8925 ENTRY(cpum.GstCtx.cs.u64Base),
8926 ENTRY(cpum.GstCtx.cs.u32Limit),
8927 ENTRY(cpum.GstCtx.cs.Attr),
8928 ENTRY(cpum.GstCtx.ss.Sel),
8929 ENTRY(cpum.GstCtx.ss.u64Base),
8930 ENTRY(cpum.GstCtx.ss.u32Limit),
8931 ENTRY(cpum.GstCtx.ss.Attr),
8932 ENTRY(cpum.GstCtx.ds.Sel),
8933 ENTRY(cpum.GstCtx.ds.u64Base),
8934 ENTRY(cpum.GstCtx.ds.u32Limit),
8935 ENTRY(cpum.GstCtx.ds.Attr),
8936 ENTRY(cpum.GstCtx.fs.Sel),
8937 ENTRY(cpum.GstCtx.fs.u64Base),
8938 ENTRY(cpum.GstCtx.fs.u32Limit),
8939 ENTRY(cpum.GstCtx.fs.Attr),
8940 ENTRY(cpum.GstCtx.gs.Sel),
8941 ENTRY(cpum.GstCtx.gs.u64Base),
8942 ENTRY(cpum.GstCtx.gs.u32Limit),
8943 ENTRY(cpum.GstCtx.gs.Attr),
8944 ENTRY(cpum.GstCtx.rip),
8945 ENTRY(cpum.GstCtx.eflags),
8946 ENTRY(cpum.GstCtx.uRipInhibitInt),
8947 ENTRY(cpum.GstCtx.cr0),
8948 ENTRY(cpum.GstCtx.cr4),
8949 ENTRY(cpum.GstCtx.aXcr[0]),
8950 ENTRY(cpum.GstCtx.aXcr[1]),
8951#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8952 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8953 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8954 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8955 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8956 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8957 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8958 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8959 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8960 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8961 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8962 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8963 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8964 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8965 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8966 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8967 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8968 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8969 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8970 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8971 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8972 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8973 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8974 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8975 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8976 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8977 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8978 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8979 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8980 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8981 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8982 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8983 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8984 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8985#endif
8986#undef ENTRY
8987 };
8988#ifdef VBOX_STRICT
8989 static bool s_fOrderChecked = false;
8990 if (!s_fOrderChecked)
8991 {
8992 s_fOrderChecked = true;
8993 uint32_t offPrev = s_aMembers[0].off;
8994 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8995 {
8996 Assert(s_aMembers[i].off > offPrev);
8997 offPrev = s_aMembers[i].off;
8998 }
8999 }
9000#endif
9001
9002 /*
9003 * Binary lookup.
9004 */
9005 unsigned iStart = 0;
9006 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9007 for (;;)
9008 {
9009 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9010 uint32_t const offCur = s_aMembers[iCur].off;
9011 if (off < offCur)
9012 {
9013 if (iCur != iStart)
9014 iEnd = iCur;
9015 else
9016 break;
9017 }
9018 else if (off > offCur)
9019 {
9020 if (iCur + 1 < iEnd)
9021 iStart = iCur + 1;
9022 else
9023 break;
9024 }
9025 else
9026 return s_aMembers[iCur].pszName;
9027 }
9028#ifdef VBOX_WITH_STATISTICS
9029 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9030 return "iem.s.acThreadedFuncStats[iFn]";
9031#endif
9032 return NULL;
9033}
9034
9035
9036/**
9037 * Translates a label to a name.
9038 */
9039static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
9040{
9041 switch (enmLabel)
9042 {
9043#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
9044 STR_CASE_CMN(Invalid);
9045 STR_CASE_CMN(RaiseDe);
9046 STR_CASE_CMN(RaiseUd);
9047 STR_CASE_CMN(RaiseSseRelated);
9048 STR_CASE_CMN(RaiseAvxRelated);
9049 STR_CASE_CMN(RaiseSseAvxFpRelated);
9050 STR_CASE_CMN(RaiseNm);
9051 STR_CASE_CMN(RaiseGp0);
9052 STR_CASE_CMN(RaiseMf);
9053 STR_CASE_CMN(RaiseXf);
9054 STR_CASE_CMN(ObsoleteTb);
9055 STR_CASE_CMN(NeedCsLimChecking);
9056 STR_CASE_CMN(CheckBranchMiss);
9057 STR_CASE_CMN(Return);
9058 STR_CASE_CMN(ReturnBreak);
9059 STR_CASE_CMN(ReturnBreakFF);
9060 STR_CASE_CMN(ReturnWithFlags);
9061 STR_CASE_CMN(ReturnBreakViaLookup);
9062 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
9063 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
9064 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
9065 STR_CASE_CMN(NonZeroRetOrPassUp);
9066#undef STR_CASE_CMN
9067#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
9068 STR_CASE_LBL(LoopJumpTarget);
9069 STR_CASE_LBL(If);
9070 STR_CASE_LBL(Else);
9071 STR_CASE_LBL(Endif);
9072 STR_CASE_LBL(CheckIrq);
9073 STR_CASE_LBL(TlbLookup);
9074 STR_CASE_LBL(TlbMiss);
9075 STR_CASE_LBL(TlbDone);
9076 case kIemNativeLabelType_End: break;
9077 }
9078 return NULL;
9079}
9080
9081
9082/** Info for the symbols resolver used when disassembling. */
9083typedef struct IEMNATIVDISASMSYMCTX
9084{
9085 PVMCPU pVCpu;
9086 PCIEMTB pTb;
9087# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9088 PCIEMNATIVEPERCHUNKCTX pCtx;
9089# endif
9090# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9091 PCIEMTBDBG pDbgInfo;
9092# endif
9093} IEMNATIVDISASMSYMCTX;
9094typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9095
9096
9097/**
9098 * Resolve address to symbol, if we can.
9099 */
9100static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9101{
9102#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
9103 PCIEMTB const pTb = pSymCtx->pTb;
9104 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9105 if (offNative <= pTb->Native.cInstructions)
9106 {
9107# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9108 /*
9109 * Scan debug info for a matching label.
9110 * Since the debug info should be 100% linear, we can do a binary search here.
9111 */
9112 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9113 if (pDbgInfo)
9114 {
9115 uint32_t const cEntries = pDbgInfo->cEntries;
9116 uint32_t idxEnd = cEntries;
9117 uint32_t idxStart = 0;
9118 for (;;)
9119 {
9120 /* Find a NativeOffset record close to the midpoint. */
9121 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9122 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9123 idx--;
9124 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9125 {
9126 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9127 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9128 idx++;
9129 if (idx >= idxEnd)
9130 break;
9131 }
9132
9133 /* Do the binary searching thing. */
9134 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9135 {
9136 if (idx > idxStart)
9137 idxEnd = idx;
9138 else
9139 break;
9140 }
9141 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9142 {
9143 idx += 1;
9144 if (idx < idxEnd)
9145 idxStart = idx;
9146 else
9147 break;
9148 }
9149 else
9150 {
9151 /* Got a matching offset, scan forward till we hit a label, but
9152 stop when the native offset changes. */
9153 while (++idx < cEntries)
9154 switch (pDbgInfo->aEntries[idx].Gen.uType)
9155 {
9156 case kIemTbDbgEntryType_Label:
9157 {
9158 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9159 const char * const pszName = iemNativeGetLabelName(enmLabel);
9160 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9161 return pszName;
9162 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9163 return pszBuf;
9164 }
9165
9166 case kIemTbDbgEntryType_NativeOffset:
9167 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9168 return NULL;
9169 break;
9170 }
9171 break;
9172 }
9173 }
9174 }
9175# endif
9176 }
9177# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9178 else
9179 {
9180 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9181 if (pChunkCtx)
9182 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9183 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9184 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9185 }
9186# endif
9187#endif
9188 RT_NOREF(pSymCtx, uAddress, pszBuf, cbBuf);
9189 return NULL;
9190}
9191
9192#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9193
9194/**
9195 * @callback_method_impl{FNDISGETSYMBOL}
9196 */
9197static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9198 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9199{
9200 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9201 if (pszSym)
9202 {
9203 *poff = 0;
9204 if (pszSym != pszBuf)
9205 return RTStrCopy(pszBuf, cchBuf, pszSym);
9206 return VINF_SUCCESS;
9207 }
9208 RT_NOREF(pDis, u32Sel);
9209 return VERR_SYMBOL_NOT_FOUND;
9210}
9211
9212#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9213
9214/**
9215 * Annotates an instruction decoded by the capstone disassembler.
9216 */
9217static const char *
9218iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9219{
9220# if defined(RT_ARCH_ARM64)
9221 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9222 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9223 {
9224 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9225 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9226 char const *psz = strchr(pInstr->op_str, '[');
9227 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9228 {
9229 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9230 int32_t off = -1;
9231 psz += 4;
9232 if (*psz == ']')
9233 off = 0;
9234 else if (*psz == ',')
9235 {
9236 psz = RTStrStripL(psz + 1);
9237 if (*psz == '#')
9238 off = RTStrToInt32(&psz[1]);
9239 /** @todo deal with index registers and LSL as well... */
9240 }
9241 if (off >= 0)
9242 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9243 }
9244 }
9245 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9246 {
9247 const char *pszAddr = strchr(pInstr->op_str, '#');
9248 if (pszAddr)
9249 {
9250 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9251 if (uAddr != 0)
9252 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9253 }
9254 }
9255# endif
9256 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9257 return NULL;
9258}
9259#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9260
9261
9262DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9263{
9264 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9265#if defined(RT_ARCH_AMD64)
9266 static const char * const a_apszMarkers[] =
9267 {
9268 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9269 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9270 };
9271#endif
9272
9273 char szDisBuf[512];
9274 DISSTATE Dis;
9275 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9276 uint32_t const cNative = pTb->Native.cInstructions;
9277 uint32_t offNative = 0;
9278#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9279 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9280#endif
9281 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9282 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9283 : DISCPUMODE_64BIT;
9284#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9285# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9286 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9287# else
9288 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9289# endif
9290#elif defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
9291 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, pDbgInfo };
9292#else
9293 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb };
9294#endif
9295#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9296 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9297#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9298 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9299#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9300# error "Port me"
9301#else
9302 csh hDisasm = ~(size_t)0;
9303# if defined(RT_ARCH_AMD64)
9304 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9305# elif defined(RT_ARCH_ARM64)
9306 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9307# else
9308# error "Port me"
9309# endif
9310 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9311
9312 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9313 //Assert(rcCs == CS_ERR_OK);
9314#endif
9315
9316 /*
9317 * Print TB info.
9318 */
9319 pHlp->pfnPrintf(pHlp,
9320 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9321 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9322 pTb, pTb->GCPhysPc,
9323#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9324 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9325#else
9326 pTb->FlatPc,
9327#endif
9328 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9329 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9330#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9331 if (pDbgInfo && pDbgInfo->cEntries > 1)
9332 {
9333 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9334
9335 /*
9336 * This disassembly is driven by the debug info which follows the native
9337 * code and indicates when it starts with the next guest instructions,
9338 * where labels are and such things.
9339 */
9340 uint32_t idxThreadedCall = 0;
9341 uint32_t idxGuestInstr = 0;
9342 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9343 uint8_t idxRange = UINT8_MAX;
9344 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9345 uint32_t offRange = 0;
9346 uint32_t offOpcodes = 0;
9347 uint32_t const cbOpcodes = pTb->cbOpcodes;
9348 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9349 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9350 uint32_t iDbgEntry = 1;
9351 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9352
9353 while (offNative < cNative)
9354 {
9355 /* If we're at or have passed the point where the next chunk of debug
9356 info starts, process it. */
9357 if (offDbgNativeNext <= offNative)
9358 {
9359 offDbgNativeNext = UINT32_MAX;
9360 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9361 {
9362 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9363 {
9364 case kIemTbDbgEntryType_GuestInstruction:
9365 {
9366 /* Did the exec flag change? */
9367 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9368 {
9369 pHlp->pfnPrintf(pHlp,
9370 " fExec change %#08x -> %#08x %s\n",
9371 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9372 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9373 szDisBuf, sizeof(szDisBuf)));
9374 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9375 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9376 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9377 : DISCPUMODE_64BIT;
9378 }
9379
9380 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9381 where the compilation was aborted before the opcode was recorded and the actual
9382 instruction was translated to a threaded call. This may happen when we run out
9383 of ranges, or when some complicated interrupts/FFs are found to be pending or
9384 similar. So, we just deal with it here rather than in the compiler code as it
9385 is a lot simpler to do here. */
9386 if ( idxRange == UINT8_MAX
9387 || idxRange >= cRanges
9388 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9389 {
9390 idxRange += 1;
9391 if (idxRange < cRanges)
9392 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9393 else
9394 continue;
9395 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9396 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9397 + (pTb->aRanges[idxRange].idxPhysPage == 0
9398 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9399 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9400 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9401 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9402 pTb->aRanges[idxRange].idxPhysPage);
9403 GCPhysPc += offRange;
9404 }
9405
9406 /* Disassemble the instruction. */
9407 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9408 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9409 uint32_t cbInstr = 1;
9410 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9411 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9412 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9413 if (RT_SUCCESS(rc))
9414 {
9415 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9416 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9417 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9418 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9419
9420 static unsigned const s_offMarker = 55;
9421 static char const s_szMarker[] = " ; <--- guest";
9422 if (cch < s_offMarker)
9423 {
9424 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9425 cch = s_offMarker;
9426 }
9427 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9428 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9429
9430 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9431 }
9432 else
9433 {
9434 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9435 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9436 cbInstr = 1;
9437 }
9438 idxGuestInstr++;
9439 GCPhysPc += cbInstr;
9440 offOpcodes += cbInstr;
9441 offRange += cbInstr;
9442 continue;
9443 }
9444
9445 case kIemTbDbgEntryType_ThreadedCall:
9446 pHlp->pfnPrintf(pHlp,
9447 " Call #%u to %s (%u args) - %s\n",
9448 idxThreadedCall,
9449 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9450 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9451 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9452 idxThreadedCall++;
9453 continue;
9454
9455 case kIemTbDbgEntryType_GuestRegShadowing:
9456 {
9457 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9458 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9459 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9460 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9461 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9462 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9463 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9464 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9465 else
9466 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9467 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9468 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9469 continue;
9470 }
9471
9472#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9473 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9474 {
9475 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9476 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9477 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9478 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9479 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9480 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9481 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9482 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9483 else
9484 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9485 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9486 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9487 continue;
9488 }
9489#endif
9490
9491 case kIemTbDbgEntryType_Label:
9492 {
9493 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9494 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9495 {
9496 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9497 ? " ; regs state restored pre-if-block" : "";
9498 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9499 }
9500 else
9501 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9502 continue;
9503 }
9504
9505 case kIemTbDbgEntryType_NativeOffset:
9506 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9507 Assert(offDbgNativeNext >= offNative);
9508 break;
9509
9510#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9511 case kIemTbDbgEntryType_DelayedPcUpdate:
9512 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9513 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9514 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9515 continue;
9516#endif
9517
9518#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9519 case kIemTbDbgEntryType_GuestRegDirty:
9520 {
9521 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9522 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9523 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9524 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9525 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9526 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9527 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9528 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9529 pszGstReg, pszHstReg);
9530 continue;
9531 }
9532
9533 case kIemTbDbgEntryType_GuestRegWriteback:
9534 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9535 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9536 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9537 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9538 continue;
9539#endif
9540
9541 default:
9542 AssertFailed();
9543 }
9544 iDbgEntry++;
9545 break;
9546 }
9547 }
9548
9549 /*
9550 * Disassemble the next native instruction.
9551 */
9552 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9553# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9554 uint32_t cbInstr = sizeof(paNative[0]);
9555 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9556 if (RT_SUCCESS(rc))
9557 {
9558# if defined(RT_ARCH_AMD64)
9559 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9560 {
9561 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9562 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9563 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9564 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9565 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9566 uInfo & 0x8000 ? "recompiled" : "todo");
9567 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9568 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9569 else
9570 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9571 }
9572 else
9573# endif
9574 {
9575 const char *pszAnnotation = NULL;
9576# ifdef RT_ARCH_AMD64
9577 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9578 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9579 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9580 iemNativeDisasmGetSymbolCb, &SymCtx);
9581 PCDISOPPARAM pMemOp;
9582 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9583 pMemOp = &Dis.aParams[0];
9584 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9585 pMemOp = &Dis.aParams[1];
9586 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9587 pMemOp = &Dis.aParams[2];
9588 else
9589 pMemOp = NULL;
9590 if ( pMemOp
9591 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9592 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9593 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9594 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9595
9596# elif defined(RT_ARCH_ARM64)
9597 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9598 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9599 iemNativeDisasmGetSymbolCb, &SymCtx);
9600# else
9601# error "Port me"
9602# endif
9603 if (pszAnnotation)
9604 {
9605 static unsigned const s_offAnnotation = 55;
9606 size_t const cchAnnotation = strlen(pszAnnotation);
9607 size_t cchDis = strlen(szDisBuf);
9608 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9609 {
9610 if (cchDis < s_offAnnotation)
9611 {
9612 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9613 cchDis = s_offAnnotation;
9614 }
9615 szDisBuf[cchDis++] = ' ';
9616 szDisBuf[cchDis++] = ';';
9617 szDisBuf[cchDis++] = ' ';
9618 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9619 }
9620 }
9621 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9622 }
9623 }
9624 else
9625 {
9626# if defined(RT_ARCH_AMD64)
9627 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9628 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9629# elif defined(RT_ARCH_ARM64)
9630 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9631# else
9632# error "Port me"
9633# endif
9634 cbInstr = sizeof(paNative[0]);
9635 }
9636 offNative += cbInstr / sizeof(paNative[0]);
9637
9638# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9639 cs_insn *pInstr;
9640 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9641 (uintptr_t)pNativeCur, 1, &pInstr);
9642 if (cInstrs > 0)
9643 {
9644 Assert(cInstrs == 1);
9645 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9646 size_t const cchOp = strlen(pInstr->op_str);
9647# if defined(RT_ARCH_AMD64)
9648 if (pszAnnotation)
9649 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9650 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9651 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9652 else
9653 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9654 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9655
9656# else
9657 if (pszAnnotation)
9658 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9659 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9660 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9661 else
9662 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9663 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9664# endif
9665 offNative += pInstr->size / sizeof(*pNativeCur);
9666 cs_free(pInstr, cInstrs);
9667 }
9668 else
9669 {
9670# if defined(RT_ARCH_AMD64)
9671 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9672 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9673# else
9674 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9675# endif
9676 offNative++;
9677 }
9678# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9679 }
9680 }
9681 else
9682#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9683 {
9684 /*
9685 * No debug info, just disassemble the x86 code and then the native code.
9686 *
9687 * First the guest code:
9688 */
9689 for (unsigned i = 0; i < pTb->cRanges; i++)
9690 {
9691 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9692 + (pTb->aRanges[i].idxPhysPage == 0
9693 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9694 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9695 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9696 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9697 unsigned off = pTb->aRanges[i].offOpcodes;
9698 /** @todo this ain't working when crossing pages! */
9699 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9700 while (off < cbOpcodes)
9701 {
9702 uint32_t cbInstr = 1;
9703 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9704 &pTb->pabOpcodes[off], cbOpcodes - off,
9705 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9706 if (RT_SUCCESS(rc))
9707 {
9708 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9709 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9710 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9711 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9712 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9713 GCPhysPc += cbInstr;
9714 off += cbInstr;
9715 }
9716 else
9717 {
9718 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9719 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9720 break;
9721 }
9722 }
9723 }
9724
9725 /*
9726 * Then the native code:
9727 */
9728 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9729 while (offNative < cNative)
9730 {
9731 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9732# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9733 uint32_t cbInstr = sizeof(paNative[0]);
9734 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9735 if (RT_SUCCESS(rc))
9736 {
9737# if defined(RT_ARCH_AMD64)
9738 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9739 {
9740 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9741 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9742 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9743 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9744 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9745 uInfo & 0x8000 ? "recompiled" : "todo");
9746 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9747 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9748 else
9749 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9750 }
9751 else
9752# endif
9753 {
9754# ifdef RT_ARCH_AMD64
9755 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9756 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9757 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9758 iemNativeDisasmGetSymbolCb, &SymCtx);
9759# elif defined(RT_ARCH_ARM64)
9760 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9761 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9762 iemNativeDisasmGetSymbolCb, &SymCtx);
9763# else
9764# error "Port me"
9765# endif
9766 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9767 }
9768 }
9769 else
9770 {
9771# if defined(RT_ARCH_AMD64)
9772 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9773 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9774# else
9775 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9776# endif
9777 cbInstr = sizeof(paNative[0]);
9778 }
9779 offNative += cbInstr / sizeof(paNative[0]);
9780
9781# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9782 cs_insn *pInstr;
9783 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9784 (uintptr_t)pNativeCur, 1, &pInstr);
9785 if (cInstrs > 0)
9786 {
9787 Assert(cInstrs == 1);
9788 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9789 size_t const cchOp = strlen(pInstr->op_str);
9790# if defined(RT_ARCH_AMD64)
9791 if (pszAnnotation)
9792 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9793 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9794 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9795 else
9796 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9797 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9798
9799# else
9800 if (pszAnnotation)
9801 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9802 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9803 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9804 else
9805 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9806 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9807# endif
9808 offNative += pInstr->size / sizeof(*pNativeCur);
9809 cs_free(pInstr, cInstrs);
9810 }
9811 else
9812 {
9813# if defined(RT_ARCH_AMD64)
9814 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9815 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9816# else
9817 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9818# endif
9819 offNative++;
9820 }
9821# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9822 }
9823 }
9824
9825#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9826 /* Cleanup. */
9827 cs_close(&hDisasm);
9828#endif
9829}
9830
9831
9832#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9833
9834/** Emit alignment padding between labels / functions. */
9835DECL_INLINE_THROW(uint32_t)
9836iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9837{
9838 if (off & fAlignMask)
9839 {
9840 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9841 while (off & fAlignMask)
9842# if defined(RT_ARCH_AMD64)
9843 pCodeBuf[off++] = 0xcc;
9844# elif defined(RT_ARCH_ARM64)
9845 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9846# else
9847# error "port me"
9848# endif
9849 }
9850 return off;
9851}
9852
9853
9854/**
9855 * Called when a new chunk is allocate to emit common per-chunk code.
9856 *
9857 * Allocates a per-chunk context directly from the chunk itself and place the
9858 * common code there.
9859 *
9860 * @returns Pointer to the chunk context start.
9861 * @param pVCpu The cross context virtual CPU structure of the calling
9862 * thread.
9863 * @param idxChunk The index of the chunk being added and requiring a
9864 * common code context.
9865 */
9866DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk)
9867{
9868 /*
9869 * Allocate a new recompiler state (since we're likely to be called while
9870 * the default one is fully loaded already with a recompiled TB).
9871 *
9872 * This is a bit of overkill, but this isn't a frequently used code path.
9873 */
9874 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9875 AssertReturn(pReNative, NULL);
9876
9877# if defined(RT_ARCH_AMD64)
9878 uint32_t const fAlignMask = 15;
9879# elif defined(RT_ARCH_ARM64)
9880 uint32_t const fAlignMask = 31 / 4;
9881# else
9882# error "port me"
9883# endif
9884 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9885 int rc = VINF_SUCCESS;
9886 uint32_t off = 0;
9887
9888 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9889 {
9890 /*
9891 * Emit the epilog code.
9892 */
9893 aoffLabels[kIemNativeLabelType_Return] = off;
9894 off = iemNativeEmitCoreEpilog(pReNative, off);
9895
9896 /*
9897 * Generate special jump labels. All of these gets a copy of the epilog code.
9898 */
9899 static struct
9900 {
9901 IEMNATIVELABELTYPE enmExitReason;
9902 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9903 } const s_aSpecialWithEpilogs[] =
9904 {
9905 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9906 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9907 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9908 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9909 };
9910 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9911 {
9912 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9913 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9914 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9915 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9916 off = iemNativeEmitCoreEpilog(pReNative, off);
9917 }
9918
9919 /*
9920 * Do what iemNativeEmitReturnBreakViaLookup does.
9921 */
9922 static struct
9923 {
9924 IEMNATIVELABELTYPE enmExitReason;
9925 uintptr_t pfnHelper;
9926 } const s_aViaLookup[] =
9927 {
9928 { kIemNativeLabelType_ReturnBreakViaLookup,
9929 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9930 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9931 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9932 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9933 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9934 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9935 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9936 };
9937 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9938 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9939 {
9940 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9941 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9942 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9943 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9944 }
9945
9946 /*
9947 * Generate simple TB tail labels that just calls a help with a pVCpu
9948 * arg and either return or longjmps/throws a non-zero status.
9949 */
9950 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9951 static struct
9952 {
9953 IEMNATIVELABELTYPE enmExitReason;
9954 bool fWithEpilog;
9955 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9956 } const s_aSimpleTailLabels[] =
9957 {
9958 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9959 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9960 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9961 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9962 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9963 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9964 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9965 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9966 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9967 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9968 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9969 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9970 };
9971 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9972 {
9973 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9974 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9975 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9976
9977 /* int pfnCallback(PVMCPUCC pVCpu) */
9978 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9979 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9980
9981 /* jump back to the return sequence / generate a return sequence. */
9982 if (!s_aSimpleTailLabels[i].fWithEpilog)
9983 off = iemNativeEmitJmpToFixed(pReNative, off, aoffLabels[kIemNativeLabelType_Return]);
9984 else
9985 off = iemNativeEmitCoreEpilog(pReNative, off);
9986 }
9987
9988
9989# ifdef VBOX_STRICT
9990 /* Make sure we've generate code for all labels. */
9991 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
9992 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_Return);
9993#endif
9994 }
9995 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9996 {
9997 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
9998 iemNativeTerm(pReNative);
9999 return NULL;
10000 }
10001 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10002
10003 /*
10004 * Allocate memory for the context (first) and the common code (last).
10005 */
10006 PIEMNATIVEPERCHUNKCTX pCtx;
10007 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
10008 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
10009 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
10010 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
10011 AssertLogRelMsgReturn(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk), NULL);
10012
10013 /*
10014 * Copy over the generated code.
10015 * There should be no fixups or labels defined here.
10016 */
10017 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
10018 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
10019
10020 Assert(pReNative->cFixups == 0);
10021 Assert(pReNative->cLabels == 0);
10022
10023 /*
10024 * Initialize the context.
10025 */
10026 AssertCompile(kIemNativeLabelType_Invalid == 0);
10027 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
10028 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
10029 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
10030 {
10031 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_Return);
10032 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
10033 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
10034 }
10035
10036 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
10037
10038 iemNativeTerm(pReNative);
10039 return pCtx;
10040}
10041
10042#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10043
10044/**
10045 * Recompiles the given threaded TB into a native one.
10046 *
10047 * In case of failure the translation block will be returned as-is.
10048 *
10049 * @returns pTb.
10050 * @param pVCpu The cross context virtual CPU structure of the calling
10051 * thread.
10052 * @param pTb The threaded translation to recompile to native.
10053 */
10054DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10055{
10056#if 0 /* For profiling the native recompiler code. */
10057l_profile_again:
10058#endif
10059 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10060
10061 /*
10062 * The first time thru, we allocate the recompiler state and save it,
10063 * all the other times we'll just reuse the saved one after a quick reset.
10064 */
10065 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10066 if (RT_LIKELY(pReNative))
10067 iemNativeReInit(pReNative, pTb);
10068 else
10069 {
10070 pReNative = iemNativeInit(pVCpu, pTb);
10071 AssertReturn(pReNative, pTb);
10072 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10073 }
10074
10075#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10076 /*
10077 * First do liveness analysis. This is done backwards.
10078 */
10079 {
10080 uint32_t idxCall = pTb->Thrd.cCalls;
10081 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10082 { /* likely */ }
10083 else
10084 {
10085 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10086 while (idxCall > cAlloc)
10087 cAlloc *= 2;
10088 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10089 AssertReturn(pvNew, pTb);
10090 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10091 pReNative->cLivenessEntriesAlloc = cAlloc;
10092 }
10093 AssertReturn(idxCall > 0, pTb);
10094 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10095
10096 /* The initial (final) entry. */
10097 idxCall--;
10098 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10099
10100 /* Loop backwards thru the calls and fill in the other entries. */
10101 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10102 while (idxCall > 0)
10103 {
10104 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10105 if (pfnLiveness)
10106 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10107 else
10108 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
10109 pCallEntry--;
10110 idxCall--;
10111 }
10112
10113# ifdef VBOX_WITH_STATISTICS
10114 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
10115 to 'clobbered' rather that 'input'. */
10116 /** @todo */
10117# endif
10118 }
10119#endif
10120
10121 /*
10122 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10123 * for aborting if an error happens.
10124 */
10125 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10126#ifdef LOG_ENABLED
10127 uint32_t const cCallsOrg = cCallsLeft;
10128#endif
10129 uint32_t off = 0;
10130 int rc = VINF_SUCCESS;
10131 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10132 {
10133#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
10134 /*
10135 * Emit prolog code (fixed).
10136 */
10137 off = iemNativeEmitProlog(pReNative, off);
10138#endif
10139
10140 /*
10141 * Convert the calls to native code.
10142 */
10143#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10144 int32_t iGstInstr = -1;
10145#endif
10146#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10147 uint32_t cThreadedCalls = 0;
10148 uint32_t cRecompiledCalls = 0;
10149#endif
10150#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10151 uint32_t idxCurCall = 0;
10152#endif
10153 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10154 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10155 while (cCallsLeft-- > 0)
10156 {
10157 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10158#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10159 pReNative->idxCurCall = idxCurCall;
10160#endif
10161
10162#ifdef IEM_WITH_INTRA_TB_JUMPS
10163 /*
10164 * Define label for jump targets (currently only the first entry).
10165 */
10166 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10167 { /* likely */ }
10168 else
10169 {
10170 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10171 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10172 }
10173#endif
10174
10175 /*
10176 * Debug info, assembly markup and statistics.
10177 */
10178#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10179 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10180 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10181#endif
10182#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10183 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10184 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10185 {
10186 if (iGstInstr < (int32_t)pTb->cInstructions)
10187 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10188 else
10189 Assert(iGstInstr == pTb->cInstructions);
10190 iGstInstr = pCallEntry->idxInstr;
10191 }
10192 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10193#endif
10194#if defined(VBOX_STRICT)
10195 off = iemNativeEmitMarker(pReNative, off,
10196 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10197#endif
10198#if defined(VBOX_STRICT)
10199 iemNativeRegAssertSanity(pReNative);
10200#endif
10201#ifdef VBOX_WITH_STATISTICS
10202 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10203#endif
10204
10205#if 0
10206 if ( pTb->GCPhysPc == 0x00000000000c1240
10207 && idxCurCall == 67)
10208 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10209#endif
10210
10211 /*
10212 * Actual work.
10213 */
10214 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10215 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10216 if (pfnRecom) /** @todo stats on this. */
10217 {
10218 off = pfnRecom(pReNative, off, pCallEntry);
10219 STAM_REL_STATS({cRecompiledCalls++;});
10220 }
10221 else
10222 {
10223 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10224 STAM_REL_STATS({cThreadedCalls++;});
10225 }
10226 Assert(off <= pReNative->cInstrBufAlloc);
10227 Assert(pReNative->cCondDepth == 0);
10228
10229#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10230 if (LogIs2Enabled())
10231 {
10232 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10233# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10234 static const char s_achState[] = "CUXI";
10235# else
10236 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10237# endif
10238
10239 char szGpr[17];
10240 for (unsigned i = 0; i < 16; i++)
10241 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10242 szGpr[16] = '\0';
10243
10244 char szSegBase[X86_SREG_COUNT + 1];
10245 char szSegLimit[X86_SREG_COUNT + 1];
10246 char szSegAttrib[X86_SREG_COUNT + 1];
10247 char szSegSel[X86_SREG_COUNT + 1];
10248 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10249 {
10250 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10251 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10252 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10253 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10254 }
10255 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10256 = szSegSel[X86_SREG_COUNT] = '\0';
10257
10258 char szEFlags[8];
10259 for (unsigned i = 0; i < 7; i++)
10260 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10261 szEFlags[7] = '\0';
10262
10263 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10264 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10265 }
10266#endif
10267
10268 /*
10269 * Advance.
10270 */
10271 pCallEntry++;
10272#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10273 idxCurCall++;
10274#endif
10275 }
10276
10277 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10278 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10279 if (!cThreadedCalls)
10280 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10281
10282#ifdef VBOX_WITH_STATISTICS
10283 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10284#endif
10285
10286 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10287 off = iemNativeRegFlushPendingWrites(pReNative, off);
10288
10289 /*
10290 * Successful return, so clear the return register (eax, w0).
10291 */
10292 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
10293
10294#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10295 /*
10296 * Emit the epilog code.
10297 */
10298 uint32_t idxReturnLabel;
10299 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10300#else
10301 /*
10302 * Jump to the common per-chunk epilog code.
10303 */
10304 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10305 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_Return);
10306#endif
10307
10308#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10309 /*
10310 * Generate special jump labels.
10311 */
10312 off = iemNativeEmitRcFiddling(pReNative, off, idxReturnLabel);
10313
10314 bool const fReturnBreakViaLookup = RT_BOOL( pReNative->bmLabelTypes
10315 & ( RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookup)
10316 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithIrq)
10317 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlb)
10318 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq)));
10319 if (fReturnBreakViaLookup)
10320 {
10321 uint32_t const idxReturnBreakLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
10322 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10323 off = iemNativeEmitReturnBreakViaLookup(pReNative, off, idxReturnBreakLabel);
10324 }
10325 else if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10326 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10327
10328 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
10329 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
10330
10331 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10332 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10333
10334 /*
10335 * Generate simple TB tail labels that just calls a help with a pVCpu
10336 * arg and either return or longjmps/throws a non-zero status.
10337 *
10338 * The array entries must be ordered by enmLabel value so we can index
10339 * using fTailLabels bit numbers.
10340 */
10341 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10342 static struct
10343 {
10344 IEMNATIVELABELTYPE enmLabel;
10345 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10346 } const g_aSimpleTailLabels[] =
10347 {
10348 { kIemNativeLabelType_Invalid, NULL },
10349 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10350 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10351 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10352 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10353 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10354 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10355 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10356 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10357 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10358 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10359 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10360 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10361 };
10362
10363 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10364 AssertCompile(kIemNativeLabelType_Invalid == 0);
10365 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10366 if (fTailLabels)
10367 {
10368 do
10369 {
10370 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10371 fTailLabels &= ~RT_BIT_64(enmLabel);
10372 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10373
10374 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10375 Assert(idxLabel != UINT32_MAX);
10376 if (idxLabel != UINT32_MAX)
10377 {
10378 iemNativeLabelDefine(pReNative, idxLabel, off);
10379
10380 /* int pfnCallback(PVMCPUCC pVCpu) */
10381 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10382 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10383
10384 /* jump back to the return sequence. */
10385 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10386 }
10387
10388 } while (fTailLabels);
10389 }
10390
10391#else /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10392 /*
10393 * Generate tail labels with jumps to the common per-chunk code.
10394 */
10395# ifndef RT_ARCH_AMD64
10396 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_Return) | RT_BIT_64(kIemNativeLabelType_Invalid))));
10397 AssertCompile(kIemNativeLabelType_Invalid == 0);
10398 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10399 if (fTailLabels)
10400 {
10401 do
10402 {
10403 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10404 fTailLabels &= ~RT_BIT_64(enmLabel);
10405
10406 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10407 AssertContinue(idxLabel != UINT32_MAX);
10408 iemNativeLabelDefine(pReNative, idxLabel, off);
10409 off = iemNativeEmitTbExit(pReNative, off, enmLabel);
10410 } while (fTailLabels);
10411 }
10412# else
10413 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10414# endif
10415#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10416 }
10417 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10418 {
10419 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10420 return pTb;
10421 }
10422 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10423 Assert(off <= pReNative->cInstrBufAlloc);
10424
10425 /*
10426 * Make sure all labels has been defined.
10427 */
10428 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10429#ifdef VBOX_STRICT
10430 uint32_t const cLabels = pReNative->cLabels;
10431 for (uint32_t i = 0; i < cLabels; i++)
10432 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10433#endif
10434
10435#if 0 /* For profiling the native recompiler code. */
10436 if (pTb->Thrd.cCalls >= 136)
10437 {
10438 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10439 goto l_profile_again;
10440 }
10441#endif
10442
10443 /*
10444 * Allocate executable memory, copy over the code we've generated.
10445 */
10446 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10447 if (pTbAllocator->pDelayedFreeHead)
10448 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10449
10450 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10451#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10452 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10453 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10454 &paFinalInstrBufRx, &pCtx);
10455
10456#else
10457 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10458 &paFinalInstrBufRx, NULL);
10459#endif
10460 AssertReturn(paFinalInstrBuf, pTb);
10461 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10462
10463 /*
10464 * Apply fixups.
10465 */
10466 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10467 uint32_t const cFixups = pReNative->cFixups;
10468 for (uint32_t i = 0; i < cFixups; i++)
10469 {
10470 Assert(paFixups[i].off < off);
10471 Assert(paFixups[i].idxLabel < cLabels);
10472 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10473 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10474 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10475 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10476 switch (paFixups[i].enmType)
10477 {
10478#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10479 case kIemNativeFixupType_Rel32:
10480 Assert(paFixups[i].off + 4 <= off);
10481 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10482 continue;
10483
10484#elif defined(RT_ARCH_ARM64)
10485 case kIemNativeFixupType_RelImm26At0:
10486 {
10487 Assert(paFixups[i].off < off);
10488 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10489 Assert(offDisp >= -33554432 && offDisp < 33554432);
10490 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10491 continue;
10492 }
10493
10494 case kIemNativeFixupType_RelImm19At5:
10495 {
10496 Assert(paFixups[i].off < off);
10497 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10498 Assert(offDisp >= -262144 && offDisp < 262144);
10499 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10500 continue;
10501 }
10502
10503 case kIemNativeFixupType_RelImm14At5:
10504 {
10505 Assert(paFixups[i].off < off);
10506 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10507 Assert(offDisp >= -8192 && offDisp < 8192);
10508 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10509 continue;
10510 }
10511
10512#endif
10513 case kIemNativeFixupType_Invalid:
10514 case kIemNativeFixupType_End:
10515 break;
10516 }
10517 AssertFailed();
10518 }
10519
10520#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10521 /*
10522 * Apply TB exit fixups.
10523 */
10524 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10525 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10526 for (uint32_t i = 0; i < cTbExitFixups; i++)
10527 {
10528 Assert(paTbExitFixups[i].off < off);
10529 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10530 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10531
10532# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10533 Assert(paTbExitFixups[i].off + 4 <= off);
10534 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10535 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10536 *Ptr.pi32 = (int32_t)offDisp;
10537
10538# elif defined(RT_ARCH_ARM64)
10539 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10540 Assert(offDisp >= -33554432 && offDisp < 33554432);
10541 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10542
10543# else
10544# error "Port me!"
10545# endif
10546 }
10547#endif
10548
10549 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10550 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10551
10552 /*
10553 * Convert the translation block.
10554 */
10555 RTMemFree(pTb->Thrd.paCalls);
10556 pTb->Native.paInstructions = paFinalInstrBufRx;
10557 pTb->Native.cInstructions = off;
10558 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10559#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10560 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10561 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10562 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10563#endif
10564
10565 Assert(pTbAllocator->cThreadedTbs > 0);
10566 pTbAllocator->cThreadedTbs -= 1;
10567 pTbAllocator->cNativeTbs += 1;
10568 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10569
10570#ifdef LOG_ENABLED
10571 /*
10572 * Disassemble to the log if enabled.
10573 */
10574 if (LogIs3Enabled())
10575 {
10576 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10577 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10578# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10579 RTLogFlush(NULL);
10580# endif
10581 }
10582#endif
10583 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10584
10585 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10586 return pTb;
10587}
10588
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette