VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 106117

Last change on this file since 106117 was 106117, checked in by vboxsync, 2 months ago

VMM/IEM: Recompilation-time checks of skipped EFLAGS. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 461.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 106117 2024-09-23 13:59:08Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80
81
82/*
83 * Narrow down configs here to avoid wasting time on unused configs here.
84 * Note! Same checks in IEMAllThrdRecompiler.cpp.
85 */
86
87#ifndef IEM_WITH_CODE_TLB
88# error The code TLB must be enabled for the recompiler.
89#endif
90
91#ifndef IEM_WITH_DATA_TLB
92# error The data TLB must be enabled for the recompiler.
93#endif
94
95#ifndef IEM_WITH_SETJMP
96# error The setjmp approach must be enabled for the recompiler.
97#endif
98
99/** @todo eliminate this clang build hack. */
100#if RT_CLANG_PREREQ(4, 0)
101# pragma GCC diagnostic ignored "-Wunused-function"
102#endif
103
104
105/*********************************************************************************************************************************
106* Internal Functions *
107*********************************************************************************************************************************/
108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
109static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
110#endif
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
113 IEMNATIVEGSTREG enmGstReg, uint32_t off);
114DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
115static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
116
117
118
119/*********************************************************************************************************************************
120* Native Recompilation *
121*********************************************************************************************************************************/
122
123
124/**
125 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
126 */
127IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
128{
129 pVCpu->iem.s.cInstructions += idxInstr;
130 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
131}
132
133
134/**
135 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
136 */
137DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
138{
139 uint64_t fCpu = pVCpu->fLocalForcedActions;
140 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
141 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
142 | VMCPU_FF_TLB_FLUSH
143 | VMCPU_FF_UNHALT );
144 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
145 if (RT_LIKELY( ( !fCpu
146 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
147 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
148 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
149 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
150 return false;
151 return true;
152}
153
154
155/**
156 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
157 */
158template <bool const a_fWithIrqCheck>
159IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
160 uint32_t fFlags, RTGCPHYS GCPhysPc))
161{
162 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
163 Assert(idxTbLookup < pTb->cTbLookupEntries);
164 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
165#if 1
166 PIEMTB const pNewTb = *ppNewTb;
167 if (pNewTb)
168 {
169# ifdef VBOX_STRICT
170 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
171 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
172 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
173 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
174 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
175 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
176# endif
177 if (pNewTb->GCPhysPc == GCPhysPc)
178 {
179# ifdef VBOX_STRICT
180 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
181 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
182 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
183 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
184 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
185# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
186 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
187# else
188 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
189 {
190 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
191 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
192 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
193 }
194# endif
195 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
196 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
197 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
198#endif
199
200 /*
201 * Check them + type.
202 */
203 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
204 {
205 /*
206 * Check for interrupts and stuff.
207 */
208 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
209 * The main problem are the statistics and to some degree the logging. :/ */
210 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
211 {
212 /* Do polling. */
213 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
214 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
215 {
216 /*
217 * Success. Update statistics and switch to the next TB.
218 */
219 if (a_fWithIrqCheck)
220 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
221 else
222 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
223
224 pNewTb->cUsed += 1;
225 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
226 pVCpu->iem.s.pCurTbR3 = pNewTb;
227 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
228 pVCpu->iem.s.cTbExecNative += 1;
229 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
230 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
231 return (uintptr_t)pNewTb->Native.paInstructions;
232 }
233 }
234 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
235 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
236 }
237 else
238 {
239 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
240 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
241 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
242 }
243 }
244 else
245 {
246 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
247 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
248 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
249 }
250 }
251 else
252 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
253#else
254 NOREF(GCPhysPc);
255#endif
256
257 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
258 return 0;
259}
260
261
262/**
263 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
264 */
265template <bool const a_fWithIrqCheck>
266IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
267{
268 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
269 Assert(idxTbLookup < pTb->cTbLookupEntries);
270 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
271#if 1
272 PIEMTB const pNewTb = *ppNewTb;
273 if (pNewTb)
274 {
275 /*
276 * Calculate the flags for the next TB and check if they match.
277 */
278 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
279 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
280 { /* likely */ }
281 else
282 {
283 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
284 fFlags |= IEMTB_F_INHIBIT_SHADOW;
285 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
286 fFlags |= IEMTB_F_INHIBIT_NMI;
287 }
288 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
289 {
290 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
291 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
292 { /* likely */ }
293 else
294 fFlags |= IEMTB_F_CS_LIM_CHECKS;
295 }
296 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
297
298 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
299 {
300 /*
301 * Do the TLB lookup for flat RIP and compare the result with the next TB.
302 *
303 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
304 */
305 /* Calc the effective PC. */
306 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
307 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
308 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
309
310 /* Advance within the current buffer (PAGE) when possible. */
311 RTGCPHYS GCPhysPc;
312 uint64_t off;
313 if ( pVCpu->iem.s.pbInstrBuf
314 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
315 {
316 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
317 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
318 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
319 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
320 else
321 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
322 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
323 }
324 else
325 {
326 pVCpu->iem.s.pbInstrBuf = NULL;
327 pVCpu->iem.s.offCurInstrStart = 0;
328 pVCpu->iem.s.offInstrNextByte = 0;
329 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
330 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
331 }
332
333 if (pNewTb->GCPhysPc == GCPhysPc)
334 {
335 /*
336 * Check for interrupts and stuff.
337 */
338 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
339 * The main problem are the statistics and to some degree the logging. :/ */
340 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
341 {
342 /* Do polling. */
343 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
344 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
345 {
346 /*
347 * Success. Update statistics and switch to the next TB.
348 */
349 if (a_fWithIrqCheck)
350 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
351 else
352 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
353
354 pNewTb->cUsed += 1;
355 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
356 pVCpu->iem.s.pCurTbR3 = pNewTb;
357 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
358 pVCpu->iem.s.cTbExecNative += 1;
359 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
360 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
361 return (uintptr_t)pNewTb->Native.paInstructions;
362 }
363 }
364 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
365 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
366 }
367 else
368 {
369 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
370 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
371 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
372 }
373 }
374 else
375 {
376 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
377 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
378 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
379 }
380 }
381 else
382 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
383#else
384 NOREF(fFlags);
385 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
386#endif
387
388 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
389 return 0;
390}
391
392
393/**
394 * Used by TB code when it wants to raise a \#DE.
395 */
396IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
397{
398 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
399 iemRaiseDivideErrorJmp(pVCpu);
400#ifndef _MSC_VER
401 return VINF_IEM_RAISED_XCPT; /* not reached */
402#endif
403}
404
405
406/**
407 * Used by TB code when it wants to raise a \#UD.
408 */
409IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
410{
411 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
412 iemRaiseUndefinedOpcodeJmp(pVCpu);
413#ifndef _MSC_VER
414 return VINF_IEM_RAISED_XCPT; /* not reached */
415#endif
416}
417
418
419/**
420 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
421 *
422 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
423 */
424IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
425{
426 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
427 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
428 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
429 iemRaiseUndefinedOpcodeJmp(pVCpu);
430 else
431 iemRaiseDeviceNotAvailableJmp(pVCpu);
432#ifndef _MSC_VER
433 return VINF_IEM_RAISED_XCPT; /* not reached */
434#endif
435}
436
437
438/**
439 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
440 *
441 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
442 */
443IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
444{
445 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
446 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
447 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
448 iemRaiseUndefinedOpcodeJmp(pVCpu);
449 else
450 iemRaiseDeviceNotAvailableJmp(pVCpu);
451#ifndef _MSC_VER
452 return VINF_IEM_RAISED_XCPT; /* not reached */
453#endif
454}
455
456
457/**
458 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
459 *
460 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
461 */
462IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
463{
464 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
465 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
466 iemRaiseSimdFpExceptionJmp(pVCpu);
467 else
468 iemRaiseUndefinedOpcodeJmp(pVCpu);
469#ifndef _MSC_VER
470 return VINF_IEM_RAISED_XCPT; /* not reached */
471#endif
472}
473
474
475/**
476 * Used by TB code when it wants to raise a \#NM.
477 */
478IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
479{
480 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
481 iemRaiseDeviceNotAvailableJmp(pVCpu);
482#ifndef _MSC_VER
483 return VINF_IEM_RAISED_XCPT; /* not reached */
484#endif
485}
486
487
488/**
489 * Used by TB code when it wants to raise a \#GP(0).
490 */
491IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
492{
493 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
494 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
495#ifndef _MSC_VER
496 return VINF_IEM_RAISED_XCPT; /* not reached */
497#endif
498}
499
500
501/**
502 * Used by TB code when it wants to raise a \#MF.
503 */
504IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
505{
506 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
507 iemRaiseMathFaultJmp(pVCpu);
508#ifndef _MSC_VER
509 return VINF_IEM_RAISED_XCPT; /* not reached */
510#endif
511}
512
513
514/**
515 * Used by TB code when it wants to raise a \#XF.
516 */
517IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
518{
519 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
520 iemRaiseSimdFpExceptionJmp(pVCpu);
521#ifndef _MSC_VER
522 return VINF_IEM_RAISED_XCPT; /* not reached */
523#endif
524}
525
526
527/**
528 * Used by TB code when detecting opcode changes.
529 * @see iemThreadeFuncWorkerObsoleteTb
530 */
531IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
532{
533 /* We set fSafeToFree to false where as we're being called in the context
534 of a TB callback function, which for native TBs means we cannot release
535 the executable memory till we've returned our way back to iemTbExec as
536 that return path codes via the native code generated for the TB. */
537 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
538 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
539 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
540 return VINF_IEM_REEXEC_BREAK;
541}
542
543
544/**
545 * Used by TB code when we need to switch to a TB with CS.LIM checking.
546 */
547IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
548{
549 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
550 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
551 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
552 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
553 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
554 return VINF_IEM_REEXEC_BREAK;
555}
556
557
558/**
559 * Used by TB code when we missed a PC check after a branch.
560 */
561IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
562{
563 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
564 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
565 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
566 pVCpu->iem.s.pbInstrBuf));
567 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
568 return VINF_IEM_REEXEC_BREAK;
569}
570
571
572
573/*********************************************************************************************************************************
574* Helpers: Segmented memory fetches and stores. *
575*********************************************************************************************************************************/
576
577/**
578 * Used by TB code to load unsigned 8-bit data w/ segmentation.
579 */
580IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
581{
582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
583 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
584#else
585 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
586#endif
587}
588
589
590/**
591 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
592 * to 16 bits.
593 */
594IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
595{
596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
597 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
598#else
599 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
600#endif
601}
602
603
604/**
605 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
606 * to 32 bits.
607 */
608IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
609{
610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
611 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
612#else
613 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
614#endif
615}
616
617/**
618 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
619 * to 64 bits.
620 */
621IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
622{
623#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
624 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
625#else
626 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
627#endif
628}
629
630
631/**
632 * Used by TB code to load unsigned 16-bit data w/ segmentation.
633 */
634IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
635{
636#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
637 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
638#else
639 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
640#endif
641}
642
643
644/**
645 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
646 * to 32 bits.
647 */
648IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
649{
650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
651 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
652#else
653 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
654#endif
655}
656
657
658/**
659 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
660 * to 64 bits.
661 */
662IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
663{
664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
665 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
666#else
667 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
668#endif
669}
670
671
672/**
673 * Used by TB code to load unsigned 32-bit data w/ segmentation.
674 */
675IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
676{
677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
678 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
679#else
680 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
681#endif
682}
683
684
685/**
686 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
687 * to 64 bits.
688 */
689IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
690{
691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
692 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
693#else
694 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
695#endif
696}
697
698
699/**
700 * Used by TB code to load unsigned 64-bit data w/ segmentation.
701 */
702IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
703{
704#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
705 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
706#else
707 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
708#endif
709}
710
711
712#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
713/**
714 * Used by TB code to load 128-bit data w/ segmentation.
715 */
716IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
717{
718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
719 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
720#else
721 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
722#endif
723}
724
725
726/**
727 * Used by TB code to load 128-bit data w/ segmentation.
728 */
729IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
730{
731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
732 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
733#else
734 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
735#endif
736}
737
738
739/**
740 * Used by TB code to load 128-bit data w/ segmentation.
741 */
742IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
743{
744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
745 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
746#else
747 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
748#endif
749}
750
751
752/**
753 * Used by TB code to load 256-bit data w/ segmentation.
754 */
755IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
756{
757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
758 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
759#else
760 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
761#endif
762}
763
764
765/**
766 * Used by TB code to load 256-bit data w/ segmentation.
767 */
768IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
769{
770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
771 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
772#else
773 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
774#endif
775}
776#endif
777
778
779/**
780 * Used by TB code to store unsigned 8-bit data w/ segmentation.
781 */
782IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
783{
784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
785 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
786#else
787 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
788#endif
789}
790
791
792/**
793 * Used by TB code to store unsigned 16-bit data w/ segmentation.
794 */
795IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
796{
797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
798 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
799#else
800 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
801#endif
802}
803
804
805/**
806 * Used by TB code to store unsigned 32-bit data w/ segmentation.
807 */
808IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
809{
810#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
811 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
812#else
813 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
814#endif
815}
816
817
818/**
819 * Used by TB code to store unsigned 64-bit data w/ segmentation.
820 */
821IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
822{
823#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
824 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
825#else
826 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
827#endif
828}
829
830
831#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
832/**
833 * Used by TB code to store unsigned 128-bit data w/ segmentation.
834 */
835IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
836{
837#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
838 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
839#else
840 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
841#endif
842}
843
844
845/**
846 * Used by TB code to store unsigned 128-bit data w/ segmentation.
847 */
848IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
849{
850#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
851 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
852#else
853 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
854#endif
855}
856
857
858/**
859 * Used by TB code to store unsigned 256-bit data w/ segmentation.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
864 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
865#else
866 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
867#endif
868}
869
870
871/**
872 * Used by TB code to store unsigned 256-bit data w/ segmentation.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
877 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
878#else
879 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
880#endif
881}
882#endif
883
884
885
886/**
887 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
888 */
889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
890{
891#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
892 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
893#else
894 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
895#endif
896}
897
898
899/**
900 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
901 */
902IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
903{
904#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
905 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
906#else
907 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
908#endif
909}
910
911
912/**
913 * Used by TB code to store an 32-bit selector value onto a generic stack.
914 *
915 * Intel CPUs doesn't do write a whole dword, thus the special function.
916 */
917IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
918{
919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
920 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
921#else
922 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
923#endif
924}
925
926
927/**
928 * Used by TB code to push unsigned 64-bit value onto a generic stack.
929 */
930IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
931{
932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
933 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
934#else
935 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
936#endif
937}
938
939
940/**
941 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
942 */
943IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
944{
945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
946 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
947#else
948 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
949#endif
950}
951
952
953/**
954 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
955 */
956IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
957{
958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
959 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
960#else
961 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
962#endif
963}
964
965
966/**
967 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
968 */
969IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
970{
971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
972 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
973#else
974 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
975#endif
976}
977
978
979
980/*********************************************************************************************************************************
981* Helpers: Flat memory fetches and stores. *
982*********************************************************************************************************************************/
983
984/**
985 * Used by TB code to load unsigned 8-bit data w/ flat address.
986 * @note Zero extending the value to 64-bit to simplify assembly.
987 */
988IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
989{
990#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
991 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
992#else
993 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
994#endif
995}
996
997
998/**
999 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1000 * to 16 bits.
1001 * @note Zero extending the value to 64-bit to simplify assembly.
1002 */
1003IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1004{
1005#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1006 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1007#else
1008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1009#endif
1010}
1011
1012
1013/**
1014 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1015 * to 32 bits.
1016 * @note Zero extending the value to 64-bit to simplify assembly.
1017 */
1018IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1019{
1020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1021 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1022#else
1023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1024#endif
1025}
1026
1027
1028/**
1029 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1030 * to 64 bits.
1031 */
1032IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1033{
1034#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1035 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1036#else
1037 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1038#endif
1039}
1040
1041
1042/**
1043 * Used by TB code to load unsigned 16-bit data w/ flat address.
1044 * @note Zero extending the value to 64-bit to simplify assembly.
1045 */
1046IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1047{
1048#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1049 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1050#else
1051 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1052#endif
1053}
1054
1055
1056/**
1057 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1058 * to 32 bits.
1059 * @note Zero extending the value to 64-bit to simplify assembly.
1060 */
1061IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1062{
1063#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1064 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1065#else
1066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1067#endif
1068}
1069
1070
1071/**
1072 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1073 * to 64 bits.
1074 * @note Zero extending the value to 64-bit to simplify assembly.
1075 */
1076IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1077{
1078#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1079 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1080#else
1081 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1082#endif
1083}
1084
1085
1086/**
1087 * Used by TB code to load unsigned 32-bit data w/ flat address.
1088 * @note Zero extending the value to 64-bit to simplify assembly.
1089 */
1090IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1091{
1092#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1093 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1094#else
1095 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1096#endif
1097}
1098
1099
1100/**
1101 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1102 * to 64 bits.
1103 * @note Zero extending the value to 64-bit to simplify assembly.
1104 */
1105IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1106{
1107#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1108 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1109#else
1110 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1111#endif
1112}
1113
1114
1115/**
1116 * Used by TB code to load unsigned 64-bit data w/ flat address.
1117 */
1118IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1119{
1120#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1121 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1122#else
1123 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1124#endif
1125}
1126
1127
1128#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1129/**
1130 * Used by TB code to load unsigned 128-bit data w/ flat address.
1131 */
1132IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1133{
1134#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1135 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1136#else
1137 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1138#endif
1139}
1140
1141
1142/**
1143 * Used by TB code to load unsigned 128-bit data w/ flat address.
1144 */
1145IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1146{
1147#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1148 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1149#else
1150 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1151#endif
1152}
1153
1154
1155/**
1156 * Used by TB code to load unsigned 128-bit data w/ flat address.
1157 */
1158IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1159{
1160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1161 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1162#else
1163 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1164#endif
1165}
1166
1167
1168/**
1169 * Used by TB code to load unsigned 256-bit data w/ flat address.
1170 */
1171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1172{
1173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1174 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1175#else
1176 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1177#endif
1178}
1179
1180
1181/**
1182 * Used by TB code to load unsigned 256-bit data w/ flat address.
1183 */
1184IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1185{
1186#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1187 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1188#else
1189 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1190#endif
1191}
1192#endif
1193
1194
1195/**
1196 * Used by TB code to store unsigned 8-bit data w/ flat address.
1197 */
1198IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1199{
1200#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1201 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1202#else
1203 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1204#endif
1205}
1206
1207
1208/**
1209 * Used by TB code to store unsigned 16-bit data w/ flat address.
1210 */
1211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1212{
1213#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1214 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1215#else
1216 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1217#endif
1218}
1219
1220
1221/**
1222 * Used by TB code to store unsigned 32-bit data w/ flat address.
1223 */
1224IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1225{
1226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1227 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1228#else
1229 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1230#endif
1231}
1232
1233
1234/**
1235 * Used by TB code to store unsigned 64-bit data w/ flat address.
1236 */
1237IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1238{
1239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1240 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1241#else
1242 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1243#endif
1244}
1245
1246
1247#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1248/**
1249 * Used by TB code to store unsigned 128-bit data w/ flat address.
1250 */
1251IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1252{
1253#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1254 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1255#else
1256 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1257#endif
1258}
1259
1260
1261/**
1262 * Used by TB code to store unsigned 128-bit data w/ flat address.
1263 */
1264IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1265{
1266#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1267 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1268#else
1269 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1270#endif
1271}
1272
1273
1274/**
1275 * Used by TB code to store unsigned 256-bit data w/ flat address.
1276 */
1277IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1278{
1279#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1280 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1281#else
1282 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1283#endif
1284}
1285
1286
1287/**
1288 * Used by TB code to store unsigned 256-bit data w/ flat address.
1289 */
1290IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1293 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1294#else
1295 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1296#endif
1297}
1298#endif
1299
1300
1301
1302/**
1303 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1304 */
1305IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1308 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1309#else
1310 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1319{
1320#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1321 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1322#else
1323 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1324#endif
1325}
1326
1327
1328/**
1329 * Used by TB code to store a segment selector value onto a flat stack.
1330 *
1331 * Intel CPUs doesn't do write a whole dword, thus the special function.
1332 */
1333IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1336 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1337#else
1338 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1347{
1348#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1349 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1350#else
1351 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1352#endif
1353}
1354
1355
1356/**
1357 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1358 */
1359IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1360{
1361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1362 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1363#else
1364 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1365#endif
1366}
1367
1368
1369/**
1370 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1371 */
1372IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1373{
1374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1375 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1376#else
1377 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1378#endif
1379}
1380
1381
1382/**
1383 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1384 */
1385IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1386{
1387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1388 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1389#else
1390 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1391#endif
1392}
1393
1394
1395
1396/*********************************************************************************************************************************
1397* Helpers: Segmented memory mapping. *
1398*********************************************************************************************************************************/
1399
1400/**
1401 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1402 * segmentation.
1403 */
1404IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1405 RTGCPTR GCPtrMem, uint8_t iSegReg))
1406{
1407#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1408 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1409#else
1410 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1411#endif
1412}
1413
1414
1415/**
1416 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1417 */
1418IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1419 RTGCPTR GCPtrMem, uint8_t iSegReg))
1420{
1421#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1422 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1423#else
1424 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1425#endif
1426}
1427
1428
1429/**
1430 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1431 */
1432IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1433 RTGCPTR GCPtrMem, uint8_t iSegReg))
1434{
1435#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1436 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1437#else
1438 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1439#endif
1440}
1441
1442
1443/**
1444 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1445 */
1446IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1447 RTGCPTR GCPtrMem, uint8_t iSegReg))
1448{
1449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1450 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1451#else
1452 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1453#endif
1454}
1455
1456
1457/**
1458 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1459 * segmentation.
1460 */
1461IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1462 RTGCPTR GCPtrMem, uint8_t iSegReg))
1463{
1464#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1465 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1466#else
1467 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1468#endif
1469}
1470
1471
1472/**
1473 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1474 */
1475IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1476 RTGCPTR GCPtrMem, uint8_t iSegReg))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1480#else
1481 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1490 RTGCPTR GCPtrMem, uint8_t iSegReg))
1491{
1492#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1493 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1494#else
1495 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1496#endif
1497}
1498
1499
1500/**
1501 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1504 RTGCPTR GCPtrMem, uint8_t iSegReg))
1505{
1506#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1507 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1508#else
1509 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1510#endif
1511}
1512
1513
1514/**
1515 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1516 * segmentation.
1517 */
1518IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1519 RTGCPTR GCPtrMem, uint8_t iSegReg))
1520{
1521#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1522 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1523#else
1524 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1525#endif
1526}
1527
1528
1529/**
1530 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1531 */
1532IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1533 RTGCPTR GCPtrMem, uint8_t iSegReg))
1534{
1535#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1536 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1537#else
1538 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1539#endif
1540}
1541
1542
1543/**
1544 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1545 */
1546IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1547 RTGCPTR GCPtrMem, uint8_t iSegReg))
1548{
1549#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1550 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1551#else
1552 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1553#endif
1554}
1555
1556
1557/**
1558 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1559 */
1560IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1561 RTGCPTR GCPtrMem, uint8_t iSegReg))
1562{
1563#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1564 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1565#else
1566 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1567#endif
1568}
1569
1570
1571/**
1572 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1573 * segmentation.
1574 */
1575IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1576 RTGCPTR GCPtrMem, uint8_t iSegReg))
1577{
1578#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1579 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1580#else
1581 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1582#endif
1583}
1584
1585
1586/**
1587 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1588 */
1589IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1590 RTGCPTR GCPtrMem, uint8_t iSegReg))
1591{
1592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1593 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1594#else
1595 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1596#endif
1597}
1598
1599
1600/**
1601 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1604 RTGCPTR GCPtrMem, uint8_t iSegReg))
1605{
1606#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1607 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1608#else
1609 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1610#endif
1611}
1612
1613
1614/**
1615 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1616 */
1617IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1618 RTGCPTR GCPtrMem, uint8_t iSegReg))
1619{
1620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1621 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1622#else
1623 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1624#endif
1625}
1626
1627
1628/**
1629 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1630 */
1631IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1632 RTGCPTR GCPtrMem, uint8_t iSegReg))
1633{
1634#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1635 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1636#else
1637 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1638#endif
1639}
1640
1641
1642/**
1643 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1646 RTGCPTR GCPtrMem, uint8_t iSegReg))
1647{
1648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1649 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1650#else
1651 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1652#endif
1653}
1654
1655
1656/**
1657 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1658 * segmentation.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1661 RTGCPTR GCPtrMem, uint8_t iSegReg))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1665#else
1666 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1675 RTGCPTR GCPtrMem, uint8_t iSegReg))
1676{
1677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1678 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1679#else
1680 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1681#endif
1682}
1683
1684
1685/**
1686 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1689 RTGCPTR GCPtrMem, uint8_t iSegReg))
1690{
1691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1692 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1693#else
1694 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1695#endif
1696}
1697
1698
1699/**
1700 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1701 */
1702IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1703 RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1706 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1707#else
1708 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/*********************************************************************************************************************************
1714* Helpers: Flat memory mapping. *
1715*********************************************************************************************************************************/
1716
1717/**
1718 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1719 * address.
1720 */
1721IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1722{
1723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1724 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1725#else
1726 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1727#endif
1728}
1729
1730
1731/**
1732 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1735{
1736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1737 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1738#else
1739 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1740#endif
1741}
1742
1743
1744/**
1745 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1746 */
1747IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1748{
1749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1750 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1751#else
1752 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1753#endif
1754}
1755
1756
1757/**
1758 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1763 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1764#else
1765 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1772 * address.
1773 */
1774IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1775{
1776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1777 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1778#else
1779 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1780#endif
1781}
1782
1783
1784/**
1785 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1786 */
1787IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1788{
1789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1790 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1791#else
1792 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1793#endif
1794}
1795
1796
1797/**
1798 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1799 */
1800IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1801{
1802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1803 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1804#else
1805 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1806#endif
1807}
1808
1809
1810/**
1811 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1812 */
1813IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1814{
1815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1816 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1817#else
1818 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1819#endif
1820}
1821
1822
1823/**
1824 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1825 * address.
1826 */
1827IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1828{
1829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1830 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1831#else
1832 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1833#endif
1834}
1835
1836
1837/**
1838 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1843 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1844#else
1845 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1856 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1857#else
1858 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1865 */
1866IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1867{
1868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1869 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1870#else
1871 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1872#endif
1873}
1874
1875
1876/**
1877 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1878 * address.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1883 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1884#else
1885 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1896 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1897#else
1898 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1909 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1910#else
1911 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1922 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1923#else
1924 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1935 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1936#else
1937 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1948 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1949#else
1950 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1957 * address.
1958 */
1959IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1960{
1961#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1962 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1963#else
1964 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1965#endif
1966}
1967
1968
1969/**
1970 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1971 */
1972IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1973{
1974#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1975 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1976#else
1977 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1978#endif
1979}
1980
1981
1982/**
1983 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1984 */
1985IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1986{
1987#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1988 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1989#else
1990 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1991#endif
1992}
1993
1994
1995/**
1996 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1997 */
1998IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1999{
2000#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2001 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2002#else
2003 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2004#endif
2005}
2006
2007
2008/*********************************************************************************************************************************
2009* Helpers: Commit, rollback & unmap *
2010*********************************************************************************************************************************/
2011
2012/**
2013 * Used by TB code to commit and unmap a read-write memory mapping.
2014 */
2015IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2016{
2017 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2018}
2019
2020
2021/**
2022 * Used by TB code to commit and unmap a read-write memory mapping.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2025{
2026 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2027}
2028
2029
2030/**
2031 * Used by TB code to commit and unmap a write-only memory mapping.
2032 */
2033IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2034{
2035 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2036}
2037
2038
2039/**
2040 * Used by TB code to commit and unmap a read-only memory mapping.
2041 */
2042IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2043{
2044 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2045}
2046
2047
2048/**
2049 * Reinitializes the native recompiler state.
2050 *
2051 * Called before starting a new recompile job.
2052 */
2053static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2054{
2055 pReNative->cLabels = 0;
2056 pReNative->bmLabelTypes = 0;
2057 pReNative->cFixups = 0;
2058#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2059 pReNative->cTbExitFixups = 0;
2060#endif
2061#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2062 pReNative->pDbgInfo->cEntries = 0;
2063 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2064#endif
2065 pReNative->pTbOrg = pTb;
2066 pReNative->cCondDepth = 0;
2067 pReNative->uCondSeqNo = 0;
2068 pReNative->uCheckIrqSeqNo = 0;
2069 pReNative->uTlbSeqNo = 0;
2070#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2071 pReNative->fSkippingEFlags = 0;
2072#endif
2073#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2074 pReNative->fPostponingEFlags = 0;
2075#endif
2076
2077#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2078 pReNative->Core.offPc = 0;
2079# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2080 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2081# endif
2082# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2083 pReNative->Core.fDebugPcInitialized = false;
2084# endif
2085#endif
2086#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2087 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2088#endif
2089 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2090#if IEMNATIVE_HST_GREG_COUNT < 32
2091 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2092#endif
2093 ;
2094 pReNative->Core.bmHstRegsWithGstShadow = 0;
2095 pReNative->Core.bmGstRegShadows = 0;
2096#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2097 pReNative->Core.bmGstRegShadowDirty = 0;
2098#endif
2099 pReNative->Core.bmVars = 0;
2100 pReNative->Core.bmStack = 0;
2101 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2102 pReNative->Core.u64ArgVars = UINT64_MAX;
2103
2104 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2105 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2120 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2121 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2122 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2123 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2124 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2125 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2126 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2127 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2128
2129 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2130
2131 /* Full host register reinit: */
2132 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2133 {
2134 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2135 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2136 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2137 }
2138
2139 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2140 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2141#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2142 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2143#endif
2144#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2145 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2146#endif
2147#ifdef IEMNATIVE_REG_FIXED_TMP1
2148 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2149#endif
2150#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2151 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2152#endif
2153 );
2154 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2155 {
2156 fRegs &= ~RT_BIT_32(idxReg);
2157 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2158 }
2159
2160 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2161#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2162 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2163#endif
2164#ifdef IEMNATIVE_REG_FIXED_TMP0
2165 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2166#endif
2167#ifdef IEMNATIVE_REG_FIXED_TMP1
2168 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2169#endif
2170#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2171 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2172#endif
2173
2174#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2175 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2176# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2177 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2178# endif
2179 ;
2180 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2181 pReNative->Core.bmGstSimdRegShadows = 0;
2182 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2183 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2184
2185 /* Full host register reinit: */
2186 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2187 {
2188 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2189 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2190 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2191 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2192 }
2193
2194 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2195 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2196 {
2197 fRegs &= ~RT_BIT_32(idxReg);
2198 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2199 }
2200
2201#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2202 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2203#endif
2204
2205#endif
2206
2207 return pReNative;
2208}
2209
2210
2211/**
2212 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2213 */
2214static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2215{
2216 RTMemFree(pReNative->pInstrBuf);
2217 RTMemFree(pReNative->paLabels);
2218 RTMemFree(pReNative->paFixups);
2219#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2220 RTMemFree(pReNative->paTbExitFixups);
2221#endif
2222#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2223 RTMemFree(pReNative->pDbgInfo);
2224#endif
2225 RTMemFree(pReNative);
2226}
2227
2228
2229/**
2230 * Allocates and initializes the native recompiler state.
2231 *
2232 * This is called the first time an EMT wants to recompile something.
2233 *
2234 * @returns Pointer to the new recompiler state.
2235 * @param pVCpu The cross context virtual CPU structure of the calling
2236 * thread.
2237 * @param pTb The TB that's about to be recompiled. When this is NULL,
2238 * the recompiler state is for emitting the common per-chunk
2239 * code from iemNativeRecompileAttachExecMemChunkCtx.
2240 * @thread EMT(pVCpu)
2241 */
2242static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2243{
2244 VMCPU_ASSERT_EMT(pVCpu);
2245
2246 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2247 AssertReturn(pReNative, NULL);
2248
2249 /*
2250 * Try allocate all the buffers and stuff we need.
2251 */
2252 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2253 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2254 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2255 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2256#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2257 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2258#endif
2259#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2260 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2261#endif
2262 if (RT_LIKELY( pReNative->pInstrBuf
2263 && pReNative->paLabels
2264 && pReNative->paFixups)
2265#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2266 && pReNative->paTbExitFixups
2267#endif
2268#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2269 && pReNative->pDbgInfo
2270#endif
2271 )
2272 {
2273 /*
2274 * Set the buffer & array sizes on success.
2275 */
2276 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2277 pReNative->cLabelsAlloc = _8K / cFactor;
2278 pReNative->cFixupsAlloc = _16K / cFactor;
2279#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2280 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2281#endif
2282#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2283 pReNative->cDbgInfoAlloc = _16K / cFactor;
2284#endif
2285
2286 /* Other constant stuff: */
2287 pReNative->pVCpu = pVCpu;
2288
2289 /*
2290 * Done, just reinit it.
2291 */
2292 return iemNativeReInit(pReNative, pTb);
2293 }
2294
2295 /*
2296 * Failed. Cleanup and return.
2297 */
2298 AssertFailed();
2299 iemNativeTerm(pReNative);
2300 return NULL;
2301}
2302
2303
2304/**
2305 * Creates a label
2306 *
2307 * If the label does not yet have a defined position,
2308 * call iemNativeLabelDefine() later to set it.
2309 *
2310 * @returns Label ID. Throws VBox status code on failure, so no need to check
2311 * the return value.
2312 * @param pReNative The native recompile state.
2313 * @param enmType The label type.
2314 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2315 * label is not yet defined (default).
2316 * @param uData Data associated with the lable. Only applicable to
2317 * certain type of labels. Default is zero.
2318 */
2319DECL_HIDDEN_THROW(uint32_t)
2320iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2321 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2322{
2323 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2324#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
2325 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2326#endif
2327
2328 /*
2329 * Locate existing label definition.
2330 *
2331 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2332 * and uData is zero.
2333 */
2334 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2335 uint32_t const cLabels = pReNative->cLabels;
2336 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2337#ifndef VBOX_STRICT
2338 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2339 && offWhere == UINT32_MAX
2340 && uData == 0
2341#endif
2342 )
2343 {
2344#ifndef VBOX_STRICT
2345 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2346 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2347 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2348 if (idxLabel < pReNative->cLabels)
2349 return idxLabel;
2350#else
2351 for (uint32_t i = 0; i < cLabels; i++)
2352 if ( paLabels[i].enmType == enmType
2353 && paLabels[i].uData == uData)
2354 {
2355 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2356 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2357 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2358 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2360 return i;
2361 }
2362 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2363 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2364#endif
2365 }
2366
2367 /*
2368 * Make sure we've got room for another label.
2369 */
2370 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2371 { /* likely */ }
2372 else
2373 {
2374 uint32_t cNew = pReNative->cLabelsAlloc;
2375 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2376 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2377 cNew *= 2;
2378 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2379 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2380 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2381 pReNative->paLabels = paLabels;
2382 pReNative->cLabelsAlloc = cNew;
2383 }
2384
2385 /*
2386 * Define a new label.
2387 */
2388 paLabels[cLabels].off = offWhere;
2389 paLabels[cLabels].enmType = enmType;
2390 paLabels[cLabels].uData = uData;
2391 pReNative->cLabels = cLabels + 1;
2392
2393 Assert((unsigned)enmType < 64);
2394 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2395
2396 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2397 {
2398 Assert(uData == 0);
2399 pReNative->aidxUniqueLabels[enmType] = cLabels;
2400 }
2401
2402 if (offWhere != UINT32_MAX)
2403 {
2404#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2405 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2406 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2407#endif
2408 }
2409 return cLabels;
2410}
2411
2412
2413/**
2414 * Defines the location of an existing label.
2415 *
2416 * @param pReNative The native recompile state.
2417 * @param idxLabel The label to define.
2418 * @param offWhere The position.
2419 */
2420DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2421{
2422 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2423 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2424 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2425 pLabel->off = offWhere;
2426#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2427 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2428 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2429#endif
2430}
2431
2432
2433/**
2434 * Looks up a lable.
2435 *
2436 * @returns Label ID if found, UINT32_MAX if not.
2437 */
2438DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2439 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2440{
2441 Assert((unsigned)enmType < 64);
2442 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2443 {
2444 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2445 return pReNative->aidxUniqueLabels[enmType];
2446
2447 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2448 uint32_t const cLabels = pReNative->cLabels;
2449 for (uint32_t i = 0; i < cLabels; i++)
2450 if ( paLabels[i].enmType == enmType
2451 && paLabels[i].uData == uData
2452 && ( paLabels[i].off == offWhere
2453 || offWhere == UINT32_MAX
2454 || paLabels[i].off == UINT32_MAX))
2455 return i;
2456 }
2457 return UINT32_MAX;
2458}
2459
2460
2461/**
2462 * Adds a fixup.
2463 *
2464 * @throws VBox status code (int) on failure.
2465 * @param pReNative The native recompile state.
2466 * @param offWhere The instruction offset of the fixup location.
2467 * @param idxLabel The target label ID for the fixup.
2468 * @param enmType The fixup type.
2469 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2470 */
2471DECL_HIDDEN_THROW(void)
2472iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2473 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2474{
2475 Assert(idxLabel <= UINT16_MAX);
2476 Assert((unsigned)enmType <= UINT8_MAX);
2477#ifdef RT_ARCH_ARM64
2478 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2479 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2480 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2481#endif
2482
2483 /*
2484 * Make sure we've room.
2485 */
2486 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2487 uint32_t const cFixups = pReNative->cFixups;
2488 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2489 { /* likely */ }
2490 else
2491 {
2492 uint32_t cNew = pReNative->cFixupsAlloc;
2493 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2494 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2495 cNew *= 2;
2496 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2497 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2498 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2499 pReNative->paFixups = paFixups;
2500 pReNative->cFixupsAlloc = cNew;
2501 }
2502
2503 /*
2504 * Add the fixup.
2505 */
2506 paFixups[cFixups].off = offWhere;
2507 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2508 paFixups[cFixups].enmType = enmType;
2509 paFixups[cFixups].offAddend = offAddend;
2510 pReNative->cFixups = cFixups + 1;
2511}
2512
2513
2514#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2515/**
2516 * Adds a fixup to the per chunk tail code.
2517 *
2518 * @throws VBox status code (int) on failure.
2519 * @param pReNative The native recompile state.
2520 * @param offWhere The instruction offset of the fixup location.
2521 * @param enmExitReason The exit reason to jump to.
2522 */
2523DECL_HIDDEN_THROW(void)
2524iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2525{
2526 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2527
2528 /*
2529 * Make sure we've room.
2530 */
2531 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2532 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2533 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2534 { /* likely */ }
2535 else
2536 {
2537 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2538 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2539 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2540 cNew *= 2;
2541 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2542 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2543 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2544 pReNative->paTbExitFixups = paTbExitFixups;
2545 pReNative->cTbExitFixupsAlloc = cNew;
2546 }
2547
2548 /*
2549 * Add the fixup.
2550 */
2551 paTbExitFixups[cTbExitFixups].off = offWhere;
2552 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2553 pReNative->cTbExitFixups = cTbExitFixups + 1;
2554}
2555#endif
2556
2557
2558/**
2559 * Slow code path for iemNativeInstrBufEnsure.
2560 */
2561DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2562{
2563 /* Double the buffer size till we meet the request. */
2564 uint32_t cNew = pReNative->cInstrBufAlloc;
2565 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2566 do
2567 cNew *= 2;
2568 while (cNew < off + cInstrReq);
2569
2570 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2571#ifdef RT_ARCH_ARM64
2572 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2573#else
2574 uint32_t const cbMaxInstrBuf = _2M;
2575#endif
2576 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2577
2578 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2579 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2580
2581#ifdef VBOX_STRICT
2582 pReNative->offInstrBufChecked = off + cInstrReq;
2583#endif
2584 pReNative->cInstrBufAlloc = cNew;
2585 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2586}
2587
2588#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2589
2590/**
2591 * Grows the static debug info array used during recompilation.
2592 *
2593 * @returns Pointer to the new debug info block; throws VBox status code on
2594 * failure, so no need to check the return value.
2595 */
2596DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2597{
2598 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2599 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2600 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2601 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2602 pReNative->pDbgInfo = pDbgInfo;
2603 pReNative->cDbgInfoAlloc = cNew;
2604 return pDbgInfo;
2605}
2606
2607
2608/**
2609 * Adds a new debug info uninitialized entry, returning the pointer to it.
2610 */
2611DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2612{
2613 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2614 { /* likely */ }
2615 else
2616 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2617 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2618}
2619
2620
2621/**
2622 * Debug Info: Adds a native offset record, if necessary.
2623 */
2624DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2625{
2626 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2627
2628 /*
2629 * Do we need this one?
2630 */
2631 uint32_t const offPrev = pDbgInfo->offNativeLast;
2632 if (offPrev == off)
2633 return;
2634 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2635
2636 /*
2637 * Add it.
2638 */
2639 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2640 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2641 pEntry->NativeOffset.offNative = off;
2642 pDbgInfo->offNativeLast = off;
2643}
2644
2645
2646/**
2647 * Debug Info: Record info about a label.
2648 */
2649static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2650{
2651 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2652 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2653 pEntry->Label.uUnused = 0;
2654 pEntry->Label.enmLabel = (uint8_t)enmType;
2655 pEntry->Label.uData = uData;
2656}
2657
2658
2659/**
2660 * Debug Info: Record info about a threaded call.
2661 */
2662static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2663{
2664 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2665 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2666 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2667 pEntry->ThreadedCall.uUnused = 0;
2668 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2669}
2670
2671
2672/**
2673 * Debug Info: Record info about a new guest instruction.
2674 */
2675static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2676{
2677 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2678 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2679 pEntry->GuestInstruction.uUnused = 0;
2680 pEntry->GuestInstruction.fExec = fExec;
2681}
2682
2683
2684/**
2685 * Debug Info: Record info about guest register shadowing.
2686 */
2687DECL_HIDDEN_THROW(void)
2688iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2689 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2690{
2691 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2692 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2693 pEntry->GuestRegShadowing.uUnused = 0;
2694 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2695 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2696 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2697#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2698 Assert( idxHstReg != UINT8_MAX
2699 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2700#endif
2701}
2702
2703
2704# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2705/**
2706 * Debug Info: Record info about guest register shadowing.
2707 */
2708DECL_HIDDEN_THROW(void)
2709iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2710 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2711{
2712 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2713 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2714 pEntry->GuestSimdRegShadowing.uUnused = 0;
2715 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2716 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2717 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2718}
2719# endif
2720
2721
2722# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2723/**
2724 * Debug Info: Record info about delayed RIP updates.
2725 */
2726DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2727{
2728 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2729 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2730 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2731 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2732}
2733# endif
2734
2735# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2736
2737/**
2738 * Debug Info: Record info about a dirty guest register.
2739 */
2740DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2741 uint8_t idxGstReg, uint8_t idxHstReg)
2742{
2743 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2744 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2745 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2746 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2747 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2748}
2749
2750
2751/**
2752 * Debug Info: Record info about a dirty guest register writeback operation.
2753 */
2754DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2755{
2756 unsigned const cBitsGstRegMask = 25;
2757 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2758
2759 /* The first block of 25 bits: */
2760 if (fGstReg & fGstRegMask)
2761 {
2762 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2763 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2764 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2765 pEntry->GuestRegWriteback.cShift = 0;
2766 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2767 fGstReg &= ~(uint64_t)fGstRegMask;
2768 if (!fGstReg)
2769 return;
2770 }
2771
2772 /* The second block of 25 bits: */
2773 fGstReg >>= cBitsGstRegMask;
2774 if (fGstReg & fGstRegMask)
2775 {
2776 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2777 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2778 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2779 pEntry->GuestRegWriteback.cShift = 0;
2780 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2781 fGstReg &= ~(uint64_t)fGstRegMask;
2782 if (!fGstReg)
2783 return;
2784 }
2785
2786 /* The last block with 14 bits: */
2787 fGstReg >>= cBitsGstRegMask;
2788 Assert(fGstReg & fGstRegMask);
2789 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2790 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2791 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2792 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2793 pEntry->GuestRegWriteback.cShift = 2;
2794 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2795}
2796
2797# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2798
2799#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2800
2801
2802/*********************************************************************************************************************************
2803* Register Allocator *
2804*********************************************************************************************************************************/
2805
2806/**
2807 * Register parameter indexes (indexed by argument number).
2808 */
2809DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2810{
2811 IEMNATIVE_CALL_ARG0_GREG,
2812 IEMNATIVE_CALL_ARG1_GREG,
2813 IEMNATIVE_CALL_ARG2_GREG,
2814 IEMNATIVE_CALL_ARG3_GREG,
2815#if defined(IEMNATIVE_CALL_ARG4_GREG)
2816 IEMNATIVE_CALL_ARG4_GREG,
2817# if defined(IEMNATIVE_CALL_ARG5_GREG)
2818 IEMNATIVE_CALL_ARG5_GREG,
2819# if defined(IEMNATIVE_CALL_ARG6_GREG)
2820 IEMNATIVE_CALL_ARG6_GREG,
2821# if defined(IEMNATIVE_CALL_ARG7_GREG)
2822 IEMNATIVE_CALL_ARG7_GREG,
2823# endif
2824# endif
2825# endif
2826#endif
2827};
2828AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2829
2830/**
2831 * Call register masks indexed by argument count.
2832 */
2833DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2834{
2835 0,
2836 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2837 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2838 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2839 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2840 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2841#if defined(IEMNATIVE_CALL_ARG4_GREG)
2842 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2843 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2844# if defined(IEMNATIVE_CALL_ARG5_GREG)
2845 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2846 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2847# if defined(IEMNATIVE_CALL_ARG6_GREG)
2848 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2849 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2850 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2851# if defined(IEMNATIVE_CALL_ARG7_GREG)
2852 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2853 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2854 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2855# endif
2856# endif
2857# endif
2858#endif
2859};
2860
2861#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2862/**
2863 * BP offset of the stack argument slots.
2864 *
2865 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2866 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2867 */
2868DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2869{
2870 IEMNATIVE_FP_OFF_STACK_ARG0,
2871# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2872 IEMNATIVE_FP_OFF_STACK_ARG1,
2873# endif
2874# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2875 IEMNATIVE_FP_OFF_STACK_ARG2,
2876# endif
2877# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2878 IEMNATIVE_FP_OFF_STACK_ARG3,
2879# endif
2880};
2881AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2882#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2883
2884/**
2885 * Info about shadowed guest register values.
2886 * @see IEMNATIVEGSTREG
2887 */
2888DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2889{
2890#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2897 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2898 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2899 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2900 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2901 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2902 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2903 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2904 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2905 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2906 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2907 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2908 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2909 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2910 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2911 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2912 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2913 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2914 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2915 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2916 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2917 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2918 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2919 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2920 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2921 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2922 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2923 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2924 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2925 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2926 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2927 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2928 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2929 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2930 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2931 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2932 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2933 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2934 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2935 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2936 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2937 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2938 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2939 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2940 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2941 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2942 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2943 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2944 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2945#undef CPUMCTX_OFF_AND_SIZE
2946};
2947AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2948
2949
2950/** Host CPU general purpose register names. */
2951DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2952{
2953#ifdef RT_ARCH_AMD64
2954 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2955#elif RT_ARCH_ARM64
2956 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2957 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2958#else
2959# error "port me"
2960#endif
2961};
2962
2963
2964#if 0 /* unused */
2965/**
2966 * Tries to locate a suitable register in the given register mask.
2967 *
2968 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2969 * failed.
2970 *
2971 * @returns Host register number on success, returns UINT8_MAX on failure.
2972 */
2973static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2974{
2975 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2976 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2977 if (fRegs)
2978 {
2979 /** @todo pick better here: */
2980 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2981
2982 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2983 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2984 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2985 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2986
2987 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2988 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2989 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2990 return idxReg;
2991 }
2992 return UINT8_MAX;
2993}
2994#endif /* unused */
2995
2996#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2997
2998/**
2999 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
3000 *
3001 * @returns New code buffer offset on success, UINT32_MAX on failure.
3002 * @param pReNative .
3003 * @param off The current code buffer position.
3004 * @param enmGstReg The guest register to store to.
3005 * @param idxHstReg The host register to store from.
3006 */
3007DECL_FORCE_INLINE_THROW(uint32_t)
3008iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3009{
3010 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3011 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3012
3013 switch (g_aGstShadowInfo[enmGstReg].cb)
3014 {
3015 case sizeof(uint64_t):
3016 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3017 case sizeof(uint32_t):
3018 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3019 case sizeof(uint16_t):
3020 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3021# if 0 /* not present in the table. */
3022 case sizeof(uint8_t):
3023 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3024# endif
3025 default:
3026 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3027 }
3028}
3029
3030
3031/**
3032 * Emits code to flush a pending write of the given guest register,
3033 * version with alternative core state.
3034 *
3035 * @returns New code buffer offset.
3036 * @param pReNative The native recompile state.
3037 * @param off Current code buffer position.
3038 * @param pCore Alternative core state.
3039 * @param enmGstReg The guest register to flush.
3040 */
3041DECL_HIDDEN_THROW(uint32_t)
3042iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3043{
3044 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3045
3046 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3047 && enmGstReg <= kIemNativeGstReg_GprLast)
3048 || enmGstReg == kIemNativeGstReg_MxCsr);
3049 Assert( idxHstReg != UINT8_MAX
3050 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3051 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3052 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3053
3054 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3055
3056 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3057 return off;
3058}
3059
3060
3061/**
3062 * Emits code to flush a pending write of the given guest register.
3063 *
3064 * @returns New code buffer offset.
3065 * @param pReNative The native recompile state.
3066 * @param off Current code buffer position.
3067 * @param enmGstReg The guest register to flush.
3068 */
3069DECL_HIDDEN_THROW(uint32_t)
3070iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3071{
3072 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3073
3074 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3075 && enmGstReg <= kIemNativeGstReg_GprLast)
3076 || enmGstReg == kIemNativeGstReg_MxCsr);
3077 Assert( idxHstReg != UINT8_MAX
3078 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3079 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3080 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3081
3082 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3083
3084 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3085 return off;
3086}
3087
3088
3089/**
3090 * Flush the given set of guest registers if marked as dirty.
3091 *
3092 * @returns New code buffer offset.
3093 * @param pReNative The native recompile state.
3094 * @param off Current code buffer position.
3095 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3096 */
3097DECL_HIDDEN_THROW(uint32_t)
3098iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3099{
3100 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3101 if (bmGstRegShadowDirty)
3102 {
3103# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3104 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3105 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3106# endif
3107 do
3108 {
3109 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3110 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3111 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3112 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3113 } while (bmGstRegShadowDirty);
3114 }
3115
3116 return off;
3117}
3118
3119
3120/**
3121 * Flush all shadowed guest registers marked as dirty for the given host register.
3122 *
3123 * @returns New code buffer offset.
3124 * @param pReNative The native recompile state.
3125 * @param off Current code buffer position.
3126 * @param idxHstReg The host register.
3127 *
3128 * @note This doesn't do any unshadowing of guest registers from the host register.
3129 */
3130DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3131{
3132 /* We need to flush any pending guest register writes this host register shadows. */
3133 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3134 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3135 {
3136# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3137 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3138 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3139# endif
3140 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3141 do
3142 {
3143 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3144 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3145 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3146 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3147 } while (bmGstRegShadowDirty);
3148 }
3149
3150 return off;
3151}
3152
3153#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3154
3155
3156/**
3157 * Locate a register, possibly freeing one up.
3158 *
3159 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3160 * failed.
3161 *
3162 * @returns Host register number on success. Returns UINT8_MAX if no registers
3163 * found, the caller is supposed to deal with this and raise a
3164 * allocation type specific status code (if desired).
3165 *
3166 * @throws VBox status code if we're run into trouble spilling a variable of
3167 * recording debug info. Does NOT throw anything if we're out of
3168 * registers, though.
3169 */
3170static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3171 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3172{
3173 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3174 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3175 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3176
3177 /*
3178 * Try a freed register that's shadowing a guest register.
3179 */
3180 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3181 if (fRegs)
3182 {
3183 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3184
3185#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3186 /*
3187 * When we have liveness information, we use it to kick out all shadowed
3188 * guest register that will not be needed any more in this TB. If we're
3189 * lucky, this may prevent us from ending up here again.
3190 *
3191 * Note! We must consider the previous entry here so we don't free
3192 * anything that the current threaded function requires (current
3193 * entry is produced by the next threaded function).
3194 */
3195 uint32_t const idxCurCall = pReNative->idxCurCall;
3196 if (idxCurCall > 0)
3197 {
3198 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3199 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3200
3201 /* Merge EFLAGS. */
3202 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3203 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3204 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3205 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3206 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3207
3208 /* If it matches any shadowed registers. */
3209 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3210 {
3211#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3212 /* Writeback any dirty shadow registers we are about to unshadow. */
3213 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3214#endif
3215
3216 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3217 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3218 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3219
3220 /* See if we've got any unshadowed registers we can return now. */
3221 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3222 if (fUnshadowedRegs)
3223 {
3224 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3225 return (fPreferVolatile
3226 ? ASMBitFirstSetU32(fUnshadowedRegs)
3227 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3228 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3229 - 1;
3230 }
3231 }
3232 }
3233#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3234
3235 unsigned const idxReg = (fPreferVolatile
3236 ? ASMBitFirstSetU32(fRegs)
3237 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3238 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3239 - 1;
3240
3241 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3242 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3243 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3244 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3245
3246#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3247 /* We need to flush any pending guest register writes this host register shadows. */
3248 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3249#endif
3250
3251 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3252 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3253 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3254 return idxReg;
3255 }
3256
3257 /*
3258 * Try free up a variable that's in a register.
3259 *
3260 * We do two rounds here, first evacuating variables we don't need to be
3261 * saved on the stack, then in the second round move things to the stack.
3262 */
3263 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3264 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3265 {
3266 uint32_t fVars = pReNative->Core.bmVars;
3267 while (fVars)
3268 {
3269 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3270 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3271#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3272 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3273 continue;
3274#endif
3275
3276 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3277 && (RT_BIT_32(idxReg) & fRegMask)
3278 && ( iLoop == 0
3279 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3280 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3281 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3282 {
3283 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3284 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3285 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3286 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3287 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3288 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3289#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3290 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3291#endif
3292
3293 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3294 {
3295 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3296 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3297 }
3298
3299 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3300 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3301
3302 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3303 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3304 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3305 return idxReg;
3306 }
3307 fVars &= ~RT_BIT_32(idxVar);
3308 }
3309 }
3310
3311 return UINT8_MAX;
3312}
3313
3314
3315/**
3316 * Reassigns a variable to a different register specified by the caller.
3317 *
3318 * @returns The new code buffer position.
3319 * @param pReNative The native recompile state.
3320 * @param off The current code buffer position.
3321 * @param idxVar The variable index.
3322 * @param idxRegOld The old host register number.
3323 * @param idxRegNew The new host register number.
3324 * @param pszCaller The caller for logging.
3325 */
3326static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3327 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3328{
3329 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3330 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3331#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3332 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3333#endif
3334 RT_NOREF(pszCaller);
3335
3336#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3337 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3338#endif
3339 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3340
3341 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3342#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3343 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3344#endif
3345 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3346 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3348
3349 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3350 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3351 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3352 if (fGstRegShadows)
3353 {
3354 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3355 | RT_BIT_32(idxRegNew);
3356 while (fGstRegShadows)
3357 {
3358 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3359 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3360
3361 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3362 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3363 }
3364 }
3365
3366 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3367 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3368 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3369 return off;
3370}
3371
3372
3373/**
3374 * Moves a variable to a different register or spills it onto the stack.
3375 *
3376 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3377 * kinds can easily be recreated if needed later.
3378 *
3379 * @returns The new code buffer position.
3380 * @param pReNative The native recompile state.
3381 * @param off The current code buffer position.
3382 * @param idxVar The variable index.
3383 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3384 * call-volatile registers.
3385 */
3386DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3387 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3388{
3389 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3390 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3391 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3392 Assert(!pVar->fRegAcquired);
3393
3394 uint8_t const idxRegOld = pVar->idxReg;
3395 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3396 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3397 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3398 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3399 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3400 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3401 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3402 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3403#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3404 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3405#endif
3406
3407
3408 /** @todo Add statistics on this.*/
3409 /** @todo Implement basic variable liveness analysis (python) so variables
3410 * can be freed immediately once no longer used. This has the potential to
3411 * be trashing registers and stack for dead variables.
3412 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3413
3414 /*
3415 * First try move it to a different register, as that's cheaper.
3416 */
3417 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3418 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3419 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3420 if (fRegs)
3421 {
3422 /* Avoid using shadow registers, if possible. */
3423 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3424 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3425 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3426 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3427 }
3428
3429 /*
3430 * Otherwise we must spill the register onto the stack.
3431 */
3432 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3433 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3434 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3435 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3436
3437 pVar->idxReg = UINT8_MAX;
3438 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3439 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3440 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3441 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3442 return off;
3443}
3444
3445
3446/**
3447 * Allocates a temporary host general purpose register.
3448 *
3449 * This may emit code to save register content onto the stack in order to free
3450 * up a register.
3451 *
3452 * @returns The host register number; throws VBox status code on failure,
3453 * so no need to check the return value.
3454 * @param pReNative The native recompile state.
3455 * @param poff Pointer to the variable with the code buffer position.
3456 * This will be update if we need to move a variable from
3457 * register to stack in order to satisfy the request.
3458 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3459 * registers (@c true, default) or the other way around
3460 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3461 */
3462DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3463{
3464 /*
3465 * Try find a completely unused register, preferably a call-volatile one.
3466 */
3467 uint8_t idxReg;
3468 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3469 & ~pReNative->Core.bmHstRegsWithGstShadow
3470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3471 if (fRegs)
3472 {
3473 if (fPreferVolatile)
3474 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3475 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3476 else
3477 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3478 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3479 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3480 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3481 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3482 }
3483 else
3484 {
3485 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3486 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3487 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3488 }
3489 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3490}
3491
3492
3493/**
3494 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3495 * registers.
3496 *
3497 * @returns The host register number; throws VBox status code on failure,
3498 * so no need to check the return value.
3499 * @param pReNative The native recompile state.
3500 * @param poff Pointer to the variable with the code buffer position.
3501 * This will be update if we need to move a variable from
3502 * register to stack in order to satisfy the request.
3503 * @param fRegMask Mask of acceptable registers.
3504 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3505 * registers (@c true, default) or the other way around
3506 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3507 */
3508DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3509 bool fPreferVolatile /*= true*/)
3510{
3511 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3512 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3513
3514 /*
3515 * Try find a completely unused register, preferably a call-volatile one.
3516 */
3517 uint8_t idxReg;
3518 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3519 & ~pReNative->Core.bmHstRegsWithGstShadow
3520 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3521 & fRegMask;
3522 if (fRegs)
3523 {
3524 if (fPreferVolatile)
3525 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3526 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3527 else
3528 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3529 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3530 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3531 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3532 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3533 }
3534 else
3535 {
3536 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3537 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3538 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3539 }
3540 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3541}
3542
3543
3544/**
3545 * Allocates a temporary register for loading an immediate value into.
3546 *
3547 * This will emit code to load the immediate, unless there happens to be an
3548 * unused register with the value already loaded.
3549 *
3550 * The caller will not modify the returned register, it must be considered
3551 * read-only. Free using iemNativeRegFreeTmpImm.
3552 *
3553 * @returns The host register number; throws VBox status code on failure, so no
3554 * need to check the return value.
3555 * @param pReNative The native recompile state.
3556 * @param poff Pointer to the variable with the code buffer position.
3557 * @param uImm The immediate value that the register must hold upon
3558 * return.
3559 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3560 * registers (@c true, default) or the other way around
3561 * (@c false).
3562 *
3563 * @note Reusing immediate values has not been implemented yet.
3564 */
3565DECL_HIDDEN_THROW(uint8_t)
3566iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3567{
3568 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3569 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3570 return idxReg;
3571}
3572
3573
3574/**
3575 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3576 * iemNativeRegAllocTmpForGuestEFlags().
3577 *
3578 * See iemNativeRegAllocTmpForGuestReg() for details.
3579 */
3580static uint8_t
3581iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3582 IEMNATIVEGSTREGUSE enmIntendedUse, bool fNoVolatileRegs)
3583{
3584 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3585#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3586 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3587#endif
3588 uint32_t const fRegMask = !fNoVolatileRegs
3589 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3590 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3591
3592 /*
3593 * First check if the guest register value is already in a host register.
3594 */
3595 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3596 {
3597 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3598 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3599 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3600 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3601
3602 /* It's not supposed to be allocated... */
3603 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3604 {
3605 /*
3606 * If the register will trash the guest shadow copy, try find a
3607 * completely unused register we can use instead. If that fails,
3608 * we need to disassociate the host reg from the guest reg.
3609 */
3610 /** @todo would be nice to know if preserving the register is in any way helpful. */
3611 /* If the purpose is calculations, try duplicate the register value as
3612 we'll be clobbering the shadow. */
3613 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3614 && ( ~pReNative->Core.bmHstRegs
3615 & ~pReNative->Core.bmHstRegsWithGstShadow
3616 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3617 {
3618 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3619
3620 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3621
3622 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3623 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3624 g_apszIemNativeHstRegNames[idxRegNew]));
3625 idxReg = idxRegNew;
3626 }
3627 /* If the current register matches the restrictions, go ahead and allocate
3628 it for the caller. */
3629 else if (fRegMask & RT_BIT_32(idxReg))
3630 {
3631 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3634 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3635 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3636 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3637 else
3638 {
3639 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3640 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3641 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3642 }
3643 }
3644 /* Otherwise, allocate a register that satisfies the caller and transfer
3645 the shadowing if compatible with the intended use. (This basically
3646 means the call wants a non-volatile register (RSP push/pop scenario).) */
3647 else
3648 {
3649 Assert(fNoVolatileRegs);
3650 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3651 !fNoVolatileRegs
3652 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3653 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3654 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3655 {
3656 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3657 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3658 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3659 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3660 }
3661 else
3662 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3663 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3664 g_apszIemNativeHstRegNames[idxRegNew]));
3665 idxReg = idxRegNew;
3666 }
3667 }
3668 else
3669 {
3670 /*
3671 * Oops. Shadowed guest register already allocated!
3672 *
3673 * Allocate a new register, copy the value and, if updating, the
3674 * guest shadow copy assignment to the new register.
3675 */
3676 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3677 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3678 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3679 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3680
3681 /** @todo share register for readonly access. */
3682 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3683 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3684
3685 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3686 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3687
3688 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3689 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3690 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3691 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3692 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3693 else
3694 {
3695 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3696 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3697 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3698 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3699 }
3700 idxReg = idxRegNew;
3701 }
3702 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3703
3704#ifdef VBOX_STRICT
3705 /* Strict builds: Check that the value is correct. */
3706 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3707#endif
3708
3709#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3710 /** @todo r=aeichner Implement for registers other than GPR as well. */
3711 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3712 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3713 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3714 && enmGstReg <= kIemNativeGstReg_GprLast)
3715 || enmGstReg == kIemNativeGstReg_MxCsr))
3716 {
3717# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3718 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3719 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3720# endif
3721 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3722 }
3723#endif
3724
3725 return idxReg;
3726 }
3727
3728 /*
3729 * Allocate a new register, load it with the guest value and designate it as a copy of the
3730 */
3731 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3732
3733 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3734 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3735
3736 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3737 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3738 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3739 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3740
3741#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3742 /** @todo r=aeichner Implement for registers other than GPR as well. */
3743 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3744 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3745 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3746 && enmGstReg <= kIemNativeGstReg_GprLast)
3747 || enmGstReg == kIemNativeGstReg_MxCsr))
3748 {
3749# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3750 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3751 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3752# endif
3753 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3754 }
3755#endif
3756
3757 return idxRegNew;
3758}
3759
3760
3761/**
3762 * Allocates a temporary host general purpose register for keeping a guest
3763 * register value.
3764 *
3765 * Since we may already have a register holding the guest register value,
3766 * code will be emitted to do the loading if that's not the case. Code may also
3767 * be emitted if we have to free up a register to satify the request.
3768 *
3769 * @returns The host register number; throws VBox status code on failure, so no
3770 * need to check the return value.
3771 * @param pReNative The native recompile state.
3772 * @param poff Pointer to the variable with the code buffer
3773 * position. This will be update if we need to move a
3774 * variable from register to stack in order to satisfy
3775 * the request.
3776 * @param enmGstReg The guest register that will is to be updated.
3777 * @param enmIntendedUse How the caller will be using the host register.
3778 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3779 * register is okay (default). The ASSUMPTION here is
3780 * that the caller has already flushed all volatile
3781 * registers, so this is only applied if we allocate a
3782 * new register.
3783 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3784 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3785 */
3786DECL_HIDDEN_THROW(uint8_t)
3787iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3788 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3789 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3790{
3791#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3792 AssertMsg( fSkipLivenessAssert
3793 || pReNative->idxCurCall == 0
3794 || enmGstReg == kIemNativeGstReg_Pc
3795 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3796 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3797 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3798 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3799 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3800 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3801#endif
3802 RT_NOREF(fSkipLivenessAssert);
3803
3804 return iemNativeRegAllocTmpForGuestRegCommon(pReNative, poff, enmGstReg, enmIntendedUse, fNoVolatileRegs);
3805}
3806
3807
3808#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3809/**
3810 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3811 *
3812 * This takes additional arguments for covering liveness assertions in strict
3813 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3814 * kIemNativeGstReg_EFlags as argument.
3815 */
3816DECL_HIDDEN_THROW(uint8_t)
3817iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREGUSE enmIntendedUse,
3818 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3819{
3820 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3821 {
3822 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3823 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3824 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3825 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3826 uint32_t fState;
3827# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3828 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3829 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3830 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3831 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3832 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3833 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3834 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3835 ) \
3836 , ("%s - %u\n", #a_enmGstEfl, fState))
3837 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3838 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3839 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3840 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3841 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3842 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3843 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3844# undef MY_ASSERT_ONE_EFL
3845 }
3846 RT_NOREF(fPotentialCall);
3847 return iemNativeRegAllocTmpForGuestRegCommon(pReNative, poff, kIemNativeGstReg_EFlags,
3848 enmIntendedUse, false /*fNoVolatileRegs*/);
3849}
3850#endif
3851
3852
3853
3854/**
3855 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
3856 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
3857 *
3858 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
3859 */
3860DECL_FORCE_INLINE(uint8_t)
3861iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3862{
3863 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3864
3865 /*
3866 * First check if the guest register value is already in a host register.
3867 */
3868 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3869 {
3870 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3871 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3872 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3873 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3874
3875 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3876 {
3877 /*
3878 * We only do readonly use here, so easy compared to the other
3879 * variant of this code.
3880 */
3881 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3882 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3883 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3884 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3885 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3886
3887#ifdef VBOX_STRICT
3888 /* Strict builds: Check that the value is correct. */
3889 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3890#else
3891 RT_NOREF(poff);
3892#endif
3893 return idxReg;
3894 }
3895 }
3896
3897 return UINT8_MAX;
3898}
3899
3900
3901/**
3902 * Allocates a temporary host general purpose register that already holds the
3903 * given guest register value.
3904 *
3905 * The use case for this function is places where the shadowing state cannot be
3906 * modified due to branching and such. This will fail if the we don't have a
3907 * current shadow copy handy or if it's incompatible. The only code that will
3908 * be emitted here is value checking code in strict builds.
3909 *
3910 * The intended use can only be readonly!
3911 *
3912 * @returns The host register number, UINT8_MAX if not present.
3913 * @param pReNative The native recompile state.
3914 * @param poff Pointer to the instruction buffer offset.
3915 * Will be updated in strict builds if a register is
3916 * found.
3917 * @param enmGstReg The guest register that will is to be updated.
3918 * @note In strict builds, this may throw instruction buffer growth failures.
3919 * Non-strict builds will not throw anything.
3920 * @sa iemNativeRegAllocTmpForGuestReg
3921 */
3922DECL_HIDDEN_THROW(uint8_t)
3923iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3924{
3925#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3926 AssertMsg( pReNative->idxCurCall == 0
3927 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3928 || enmGstReg == kIemNativeGstReg_Pc
3929 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3930#endif
3931 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
3932}
3933
3934
3935#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3936/**
3937 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
3938 * EFLAGS.
3939 *
3940 * This takes additional arguments for covering liveness assertions in strict
3941 * builds, it's otherwise the same as
3942 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
3943 * kIemNativeGstReg_EFlags as argument.
3944 *
3945 * @note The @a fWrite parameter is necessary to complete the liveness picture,
3946 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
3947 * commit. It the operation clobbers all the flags, @a fRead will be
3948 * zero, so better verify the whole picture while we're here.
3949 */
3950DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3951 uint64_t fRead, uint64_t fWrite /*=0*/)
3952{
3953 if (pReNative->idxCurCall != 0)
3954 {
3955 Assert(fRead | fWrite);
3956 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3957 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3958 uint64_t const fAll = fRead | fWrite;
3959 uint32_t fState;
3960# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3961 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3962 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3963 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3964 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3965 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3966 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3967 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3968 ) \
3969 , ("%s - %u\n", #a_enmGstEfl, fState))
3970 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3971 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3972 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3973 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3974 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3975 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3976 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3977# undef MY_ASSERT_ONE_EFL
3978 }
3979 RT_NOREF(fRead);
3980 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
3981}
3982#endif
3983
3984
3985/**
3986 * Allocates argument registers for a function call.
3987 *
3988 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3989 * need to check the return value.
3990 * @param pReNative The native recompile state.
3991 * @param off The current code buffer offset.
3992 * @param cArgs The number of arguments the function call takes.
3993 */
3994DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3995{
3996 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3997 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3998 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3999 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4000
4001 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4002 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4003 else if (cArgs == 0)
4004 return true;
4005
4006 /*
4007 * Do we get luck and all register are free and not shadowing anything?
4008 */
4009 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4010 for (uint32_t i = 0; i < cArgs; i++)
4011 {
4012 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4013 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4014 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4015 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4016 }
4017 /*
4018 * Okay, not lucky so we have to free up the registers.
4019 */
4020 else
4021 for (uint32_t i = 0; i < cArgs; i++)
4022 {
4023 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4024 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4025 {
4026 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4027 {
4028 case kIemNativeWhat_Var:
4029 {
4030 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4031 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4032 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4033 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4034 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4035#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4036 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4037#endif
4038
4039 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4040 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4041 else
4042 {
4043 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4044 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4045 }
4046 break;
4047 }
4048
4049 case kIemNativeWhat_Tmp:
4050 case kIemNativeWhat_Arg:
4051 case kIemNativeWhat_rc:
4052 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4053 default:
4054 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4055 }
4056
4057 }
4058 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4059 {
4060 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4061 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4062 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4063#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4064 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4065#endif
4066 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4067 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4068 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4069 }
4070 else
4071 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4072 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4073 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4074 }
4075 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4076 return true;
4077}
4078
4079
4080DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4081
4082
4083#if 0
4084/**
4085 * Frees a register assignment of any type.
4086 *
4087 * @param pReNative The native recompile state.
4088 * @param idxHstReg The register to free.
4089 *
4090 * @note Does not update variables.
4091 */
4092DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4093{
4094 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4095 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4096 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4097 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4098 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4099 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4100 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4101 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4102 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4103 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4104 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4105 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4106 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4107 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4108
4109 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4110 /* no flushing, right:
4111 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4112 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4113 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4114 */
4115}
4116#endif
4117
4118
4119/**
4120 * Frees a temporary register.
4121 *
4122 * Any shadow copies of guest registers assigned to the host register will not
4123 * be flushed by this operation.
4124 */
4125DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4126{
4127 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4128 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4129 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4130 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4131 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4132}
4133
4134
4135/**
4136 * Frees a temporary immediate register.
4137 *
4138 * It is assumed that the call has not modified the register, so it still hold
4139 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4140 */
4141DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4142{
4143 iemNativeRegFreeTmp(pReNative, idxHstReg);
4144}
4145
4146
4147/**
4148 * Frees a register assigned to a variable.
4149 *
4150 * The register will be disassociated from the variable.
4151 */
4152DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4153{
4154 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4155 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4156 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4157 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4158 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4159#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4160 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4161#endif
4162
4163 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4164 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4165 if (!fFlushShadows)
4166 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4167 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4168 else
4169 {
4170 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4171 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4172#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4173 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4174#endif
4175 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4176 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4177 uint64_t fGstRegShadows = fGstRegShadowsOld;
4178 while (fGstRegShadows)
4179 {
4180 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4181 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4182
4183 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4184 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4185 }
4186 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4187 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4188 }
4189}
4190
4191
4192#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4193# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4194/** Host CPU SIMD register names. */
4195DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4196{
4197# ifdef RT_ARCH_AMD64
4198 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4199# elif RT_ARCH_ARM64
4200 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4201 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4202# else
4203# error "port me"
4204# endif
4205};
4206# endif
4207
4208
4209/**
4210 * Frees a SIMD register assigned to a variable.
4211 *
4212 * The register will be disassociated from the variable.
4213 */
4214DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4215{
4216 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4217 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4218 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4219 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4220 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4221 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4222
4223 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4224 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4225 if (!fFlushShadows)
4226 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4227 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4228 else
4229 {
4230 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4231 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4232 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4233 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4234 uint64_t fGstRegShadows = fGstRegShadowsOld;
4235 while (fGstRegShadows)
4236 {
4237 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4238 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4239
4240 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4241 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4242 }
4243 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4244 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4245 }
4246}
4247
4248
4249/**
4250 * Reassigns a variable to a different SIMD register specified by the caller.
4251 *
4252 * @returns The new code buffer position.
4253 * @param pReNative The native recompile state.
4254 * @param off The current code buffer position.
4255 * @param idxVar The variable index.
4256 * @param idxRegOld The old host register number.
4257 * @param idxRegNew The new host register number.
4258 * @param pszCaller The caller for logging.
4259 */
4260static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4261 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4262{
4263 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4264 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4265 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4266 RT_NOREF(pszCaller);
4267
4268 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4269 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4270 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4271
4272 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4273 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4274 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4275
4276 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4277 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4278 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4279
4280 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4281 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4282 else
4283 {
4284 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4285 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4286 }
4287
4288 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4289 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4290 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4291 if (fGstRegShadows)
4292 {
4293 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4294 | RT_BIT_32(idxRegNew);
4295 while (fGstRegShadows)
4296 {
4297 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4298 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4299
4300 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4301 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4302 }
4303 }
4304
4305 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4306 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4307 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4308 return off;
4309}
4310
4311
4312/**
4313 * Moves a variable to a different register or spills it onto the stack.
4314 *
4315 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4316 * kinds can easily be recreated if needed later.
4317 *
4318 * @returns The new code buffer position.
4319 * @param pReNative The native recompile state.
4320 * @param off The current code buffer position.
4321 * @param idxVar The variable index.
4322 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4323 * call-volatile registers.
4324 */
4325DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4326 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4327{
4328 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4329 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4330 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4331 Assert(!pVar->fRegAcquired);
4332 Assert(!pVar->fSimdReg);
4333
4334 uint8_t const idxRegOld = pVar->idxReg;
4335 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4336 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4337 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4338 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4339 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4340 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4341 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4342 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4343 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4344 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4345
4346 /** @todo Add statistics on this.*/
4347 /** @todo Implement basic variable liveness analysis (python) so variables
4348 * can be freed immediately once no longer used. This has the potential to
4349 * be trashing registers and stack for dead variables.
4350 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4351
4352 /*
4353 * First try move it to a different register, as that's cheaper.
4354 */
4355 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4356 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4357 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4358 if (fRegs)
4359 {
4360 /* Avoid using shadow registers, if possible. */
4361 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4362 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4363 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4364 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4365 }
4366
4367 /*
4368 * Otherwise we must spill the register onto the stack.
4369 */
4370 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4371 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4372 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4373
4374 if (pVar->cbVar == sizeof(RTUINT128U))
4375 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4376 else
4377 {
4378 Assert(pVar->cbVar == sizeof(RTUINT256U));
4379 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4380 }
4381
4382 pVar->idxReg = UINT8_MAX;
4383 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4384 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4385 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4386 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4387 return off;
4388}
4389
4390
4391/**
4392 * Called right before emitting a call instruction to move anything important
4393 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4394 * optionally freeing argument variables.
4395 *
4396 * @returns New code buffer offset, UINT32_MAX on failure.
4397 * @param pReNative The native recompile state.
4398 * @param off The code buffer offset.
4399 * @param cArgs The number of arguments the function call takes.
4400 * It is presumed that the host register part of these have
4401 * been allocated as such already and won't need moving,
4402 * just freeing.
4403 * @param fKeepVars Mask of variables that should keep their register
4404 * assignments. Caller must take care to handle these.
4405 */
4406DECL_HIDDEN_THROW(uint32_t)
4407iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4408{
4409 Assert(!cArgs); RT_NOREF(cArgs);
4410
4411 /* fKeepVars will reduce this mask. */
4412 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4413
4414 /*
4415 * Move anything important out of volatile registers.
4416 */
4417 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4418#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4419 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4420#endif
4421 ;
4422
4423 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4424 if (!fSimdRegsToMove)
4425 { /* likely */ }
4426 else
4427 {
4428 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4429 while (fSimdRegsToMove != 0)
4430 {
4431 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4432 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4433
4434 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4435 {
4436 case kIemNativeWhat_Var:
4437 {
4438 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4439 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4440 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4441 Assert(pVar->idxReg == idxSimdReg);
4442 Assert(pVar->fSimdReg);
4443 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4444 {
4445 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4446 idxVar, pVar->enmKind, pVar->idxReg));
4447 if (pVar->enmKind != kIemNativeVarKind_Stack)
4448 pVar->idxReg = UINT8_MAX;
4449 else
4450 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4451 }
4452 else
4453 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4454 continue;
4455 }
4456
4457 case kIemNativeWhat_Arg:
4458 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4459 continue;
4460
4461 case kIemNativeWhat_rc:
4462 case kIemNativeWhat_Tmp:
4463 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4464 continue;
4465
4466 case kIemNativeWhat_FixedReserved:
4467#ifdef RT_ARCH_ARM64
4468 continue; /* On ARM the upper half of the virtual 256-bit register. */
4469#endif
4470
4471 case kIemNativeWhat_FixedTmp:
4472 case kIemNativeWhat_pVCpuFixed:
4473 case kIemNativeWhat_pCtxFixed:
4474 case kIemNativeWhat_PcShadow:
4475 case kIemNativeWhat_Invalid:
4476 case kIemNativeWhat_End:
4477 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4478 }
4479 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4480 }
4481 }
4482
4483 /*
4484 * Do the actual freeing.
4485 */
4486 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4487 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4488 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4489 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4490
4491 /* If there are guest register shadows in any call-volatile register, we
4492 have to clear the corrsponding guest register masks for each register. */
4493 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4494 if (fHstSimdRegsWithGstShadow)
4495 {
4496 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4497 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4498 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4499 do
4500 {
4501 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4502 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4503
4504 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4505
4506#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4507 /*
4508 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4509 * to call volatile registers).
4510 */
4511 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4512 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4513 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4514#endif
4515 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4516 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4517
4518 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4519 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4520 } while (fHstSimdRegsWithGstShadow != 0);
4521 }
4522
4523 return off;
4524}
4525#endif
4526
4527
4528/**
4529 * Called right before emitting a call instruction to move anything important
4530 * out of call-volatile registers, free and flush the call-volatile registers,
4531 * optionally freeing argument variables.
4532 *
4533 * @returns New code buffer offset, UINT32_MAX on failure.
4534 * @param pReNative The native recompile state.
4535 * @param off The code buffer offset.
4536 * @param cArgs The number of arguments the function call takes.
4537 * It is presumed that the host register part of these have
4538 * been allocated as such already and won't need moving,
4539 * just freeing.
4540 * @param fKeepVars Mask of variables that should keep their register
4541 * assignments. Caller must take care to handle these.
4542 */
4543DECL_HIDDEN_THROW(uint32_t)
4544iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4545{
4546 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4547
4548 /* fKeepVars will reduce this mask. */
4549 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4550
4551#ifdef RT_ARCH_ARM64
4552AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4553#endif
4554
4555 /*
4556 * Move anything important out of volatile registers.
4557 */
4558 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4559 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4560 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4561#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4562 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4563#endif
4564 & ~g_afIemNativeCallRegs[cArgs];
4565
4566 fRegsToMove &= pReNative->Core.bmHstRegs;
4567 if (!fRegsToMove)
4568 { /* likely */ }
4569 else
4570 {
4571 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4572 while (fRegsToMove != 0)
4573 {
4574 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4575 fRegsToMove &= ~RT_BIT_32(idxReg);
4576
4577 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4578 {
4579 case kIemNativeWhat_Var:
4580 {
4581 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4582 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4583 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4584 Assert(pVar->idxReg == idxReg);
4585#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4586 Assert(!pVar->fSimdReg);
4587#endif
4588 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4589 {
4590 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4591 idxVar, pVar->enmKind, pVar->idxReg));
4592 if (pVar->enmKind != kIemNativeVarKind_Stack)
4593 pVar->idxReg = UINT8_MAX;
4594 else
4595 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4596 }
4597 else
4598 fRegsToFree &= ~RT_BIT_32(idxReg);
4599 continue;
4600 }
4601
4602 case kIemNativeWhat_Arg:
4603 AssertMsgFailed(("What?!?: %u\n", idxReg));
4604 continue;
4605
4606 case kIemNativeWhat_rc:
4607 case kIemNativeWhat_Tmp:
4608 AssertMsgFailed(("Missing free: %u\n", idxReg));
4609 continue;
4610
4611 case kIemNativeWhat_FixedTmp:
4612 case kIemNativeWhat_pVCpuFixed:
4613 case kIemNativeWhat_pCtxFixed:
4614 case kIemNativeWhat_PcShadow:
4615 case kIemNativeWhat_FixedReserved:
4616 case kIemNativeWhat_Invalid:
4617 case kIemNativeWhat_End:
4618 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4619 }
4620 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4621 }
4622 }
4623
4624 /*
4625 * Do the actual freeing.
4626 */
4627 if (pReNative->Core.bmHstRegs & fRegsToFree)
4628 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4629 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4630 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4631
4632 /* If there are guest register shadows in any call-volatile register, we
4633 have to clear the corrsponding guest register masks for each register. */
4634 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4635 if (fHstRegsWithGstShadow)
4636 {
4637 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4638 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4639 fHstRegsWithGstShadow));
4640 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4641 do
4642 {
4643 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4644 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4645
4646 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4647
4648#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4649 /*
4650 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4651 * to call volatile registers).
4652 */
4653 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4654 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4655 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4656#endif
4657
4658 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4659 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4660 } while (fHstRegsWithGstShadow != 0);
4661 }
4662
4663#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4664 /* Now for the SIMD registers, no argument support for now. */
4665 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4666#endif
4667
4668 return off;
4669}
4670
4671
4672/**
4673 * Flushes a set of guest register shadow copies.
4674 *
4675 * This is usually done after calling a threaded function or a C-implementation
4676 * of an instruction.
4677 *
4678 * @param pReNative The native recompile state.
4679 * @param fGstRegs Set of guest registers to flush.
4680 */
4681DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4682{
4683 /*
4684 * Reduce the mask by what's currently shadowed
4685 */
4686 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4687 fGstRegs &= bmGstRegShadowsOld;
4688 if (fGstRegs)
4689 {
4690 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4691 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4692 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4693 if (bmGstRegShadowsNew)
4694 {
4695 /*
4696 * Partial.
4697 */
4698 do
4699 {
4700 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4701 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4702 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4703 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4704 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4705#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4706 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4707#endif
4708
4709 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4710 fGstRegs &= ~fInThisHstReg;
4711 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4712 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4713 if (!fGstRegShadowsNew)
4714 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4715 } while (fGstRegs != 0);
4716 }
4717 else
4718 {
4719 /*
4720 * Clear all.
4721 */
4722 do
4723 {
4724 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4725 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4726 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4727 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4728 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4729#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4730 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4731#endif
4732
4733 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4734 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4735 } while (fGstRegs != 0);
4736 pReNative->Core.bmHstRegsWithGstShadow = 0;
4737 }
4738 }
4739}
4740
4741
4742/**
4743 * Flushes guest register shadow copies held by a set of host registers.
4744 *
4745 * This is used with the TLB lookup code for ensuring that we don't carry on
4746 * with any guest shadows in volatile registers, as these will get corrupted by
4747 * a TLB miss.
4748 *
4749 * @param pReNative The native recompile state.
4750 * @param fHstRegs Set of host registers to flush guest shadows for.
4751 */
4752DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4753{
4754 /*
4755 * Reduce the mask by what's currently shadowed.
4756 */
4757 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4758 fHstRegs &= bmHstRegsWithGstShadowOld;
4759 if (fHstRegs)
4760 {
4761 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4762 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4763 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4764 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4765 if (bmHstRegsWithGstShadowNew)
4766 {
4767 /*
4768 * Partial (likely).
4769 */
4770 uint64_t fGstShadows = 0;
4771 do
4772 {
4773 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4774 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4775 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4776 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4777#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4778 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4779#endif
4780
4781 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4782 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4783 fHstRegs &= ~RT_BIT_32(idxHstReg);
4784 } while (fHstRegs != 0);
4785 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4786 }
4787 else
4788 {
4789 /*
4790 * Clear all.
4791 */
4792 do
4793 {
4794 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4795 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4796 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4797 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4798#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4799 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4800#endif
4801
4802 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4803 fHstRegs &= ~RT_BIT_32(idxHstReg);
4804 } while (fHstRegs != 0);
4805 pReNative->Core.bmGstRegShadows = 0;
4806 }
4807 }
4808}
4809
4810
4811/**
4812 * Restores guest shadow copies in volatile registers.
4813 *
4814 * This is used after calling a helper function (think TLB miss) to restore the
4815 * register state of volatile registers.
4816 *
4817 * @param pReNative The native recompile state.
4818 * @param off The code buffer offset.
4819 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4820 * be active (allocated) w/o asserting. Hack.
4821 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4822 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4823 */
4824DECL_HIDDEN_THROW(uint32_t)
4825iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4826{
4827 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4828 if (fHstRegs)
4829 {
4830 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4831 do
4832 {
4833 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4834
4835 /* It's not fatal if a register is active holding a variable that
4836 shadowing a guest register, ASSUMING all pending guest register
4837 writes were flushed prior to the helper call. However, we'll be
4838 emitting duplicate restores, so it wasts code space. */
4839 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4840 RT_NOREF(fHstRegsActiveShadows);
4841
4842 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4843#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4844 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4845#endif
4846 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4847 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4848 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4849
4850 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4851 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4852
4853 fHstRegs &= ~RT_BIT_32(idxHstReg);
4854 } while (fHstRegs != 0);
4855 }
4856 return off;
4857}
4858
4859
4860
4861
4862/*********************************************************************************************************************************
4863* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4864*********************************************************************************************************************************/
4865#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4866
4867/**
4868 * Info about shadowed guest SIMD register values.
4869 * @see IEMNATIVEGSTSIMDREG
4870 */
4871static struct
4872{
4873 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4874 uint32_t offXmm;
4875 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4876 uint32_t offYmm;
4877 /** Name (for logging). */
4878 const char *pszName;
4879} const g_aGstSimdShadowInfo[] =
4880{
4881#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4882 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4883 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4884 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4885 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4886 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4887 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4888 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4889 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4890 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4891 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4892 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4893 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4894 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4895 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4896 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4897 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4898 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4899#undef CPUMCTX_OFF_AND_SIZE
4900};
4901AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4902
4903
4904/**
4905 * Frees a temporary SIMD register.
4906 *
4907 * Any shadow copies of guest registers assigned to the host register will not
4908 * be flushed by this operation.
4909 */
4910DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4911{
4912 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4913 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4914 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4915 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4916 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4917}
4918
4919
4920/**
4921 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4922 *
4923 * @returns New code bufferoffset.
4924 * @param pReNative The native recompile state.
4925 * @param off Current code buffer position.
4926 * @param enmGstSimdReg The guest SIMD register to flush.
4927 */
4928DECL_HIDDEN_THROW(uint32_t)
4929iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4930{
4931 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4932
4933 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4934 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4935 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4936 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4937
4938 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4939 {
4940 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4941 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4942 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4943 }
4944
4945 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4946 {
4947 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4948 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4949 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4950 }
4951
4952 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4953 return off;
4954}
4955
4956
4957/**
4958 * Flush the given set of guest SIMD registers if marked as dirty.
4959 *
4960 * @returns New code buffer offset.
4961 * @param pReNative The native recompile state.
4962 * @param off Current code buffer position.
4963 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4964 */
4965DECL_HIDDEN_THROW(uint32_t)
4966iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4967{
4968 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4969 & fFlushGstSimdReg;
4970 if (bmGstSimdRegShadowDirty)
4971 {
4972# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4973 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4974 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4975# endif
4976
4977 do
4978 {
4979 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4980 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4981 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4982 } while (bmGstSimdRegShadowDirty);
4983 }
4984
4985 return off;
4986}
4987
4988
4989#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4990/**
4991 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4992 *
4993 * @returns New code buffer offset.
4994 * @param pReNative The native recompile state.
4995 * @param off Current code buffer position.
4996 * @param idxHstSimdReg The host SIMD register.
4997 *
4998 * @note This doesn't do any unshadowing of guest registers from the host register.
4999 */
5000DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5001{
5002 /* We need to flush any pending guest register writes this host register shadows. */
5003 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5004 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5005 if (bmGstSimdRegShadowDirty)
5006 {
5007# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5008 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5009 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5010# endif
5011
5012 do
5013 {
5014 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5015 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5016 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5017 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5018 } while (bmGstSimdRegShadowDirty);
5019 }
5020
5021 return off;
5022}
5023#endif
5024
5025
5026/**
5027 * Locate a register, possibly freeing one up.
5028 *
5029 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5030 * failed.
5031 *
5032 * @returns Host register number on success. Returns UINT8_MAX if no registers
5033 * found, the caller is supposed to deal with this and raise a
5034 * allocation type specific status code (if desired).
5035 *
5036 * @throws VBox status code if we're run into trouble spilling a variable of
5037 * recording debug info. Does NOT throw anything if we're out of
5038 * registers, though.
5039 */
5040static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5041 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5042{
5043 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5044 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5045 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5046
5047 /*
5048 * Try a freed register that's shadowing a guest register.
5049 */
5050 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5051 if (fRegs)
5052 {
5053 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5054
5055#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5056 /*
5057 * When we have livness information, we use it to kick out all shadowed
5058 * guest register that will not be needed any more in this TB. If we're
5059 * lucky, this may prevent us from ending up here again.
5060 *
5061 * Note! We must consider the previous entry here so we don't free
5062 * anything that the current threaded function requires (current
5063 * entry is produced by the next threaded function).
5064 */
5065 uint32_t const idxCurCall = pReNative->idxCurCall;
5066 if (idxCurCall > 0)
5067 {
5068 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5069 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5070
5071 /* If it matches any shadowed registers. */
5072 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5073 {
5074 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5075 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5076 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5077
5078 /* See if we've got any unshadowed registers we can return now. */
5079 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5080 if (fUnshadowedRegs)
5081 {
5082 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5083 return (fPreferVolatile
5084 ? ASMBitFirstSetU32(fUnshadowedRegs)
5085 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5086 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5087 - 1;
5088 }
5089 }
5090 }
5091#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5092
5093 unsigned const idxReg = (fPreferVolatile
5094 ? ASMBitFirstSetU32(fRegs)
5095 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5096 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5097 - 1;
5098
5099 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5100 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5101 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5102 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5103
5104 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5105 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5106
5107 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5108 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5109 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5110 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5111 return idxReg;
5112 }
5113
5114 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5115
5116 /*
5117 * Try free up a variable that's in a register.
5118 *
5119 * We do two rounds here, first evacuating variables we don't need to be
5120 * saved on the stack, then in the second round move things to the stack.
5121 */
5122 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5123 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5124 {
5125 uint32_t fVars = pReNative->Core.bmVars;
5126 while (fVars)
5127 {
5128 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5129 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5130 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5131 continue;
5132
5133 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5134 && (RT_BIT_32(idxReg) & fRegMask)
5135 && ( iLoop == 0
5136 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5137 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5138 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5139 {
5140 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5141 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5142 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5143 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5144 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5145 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5146
5147 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5148 {
5149 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5150 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5151 }
5152
5153 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5154 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5155
5156 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5157 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5158 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5159 return idxReg;
5160 }
5161 fVars &= ~RT_BIT_32(idxVar);
5162 }
5163 }
5164
5165 AssertFailed();
5166 return UINT8_MAX;
5167}
5168
5169
5170/**
5171 * Flushes a set of guest register shadow copies.
5172 *
5173 * This is usually done after calling a threaded function or a C-implementation
5174 * of an instruction.
5175 *
5176 * @param pReNative The native recompile state.
5177 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5178 */
5179DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5180{
5181 /*
5182 * Reduce the mask by what's currently shadowed
5183 */
5184 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5185 fGstSimdRegs &= bmGstSimdRegShadows;
5186 if (fGstSimdRegs)
5187 {
5188 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5189 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5190 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5191 if (bmGstSimdRegShadowsNew)
5192 {
5193 /*
5194 * Partial.
5195 */
5196 do
5197 {
5198 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5199 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5200 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5201 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5202 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5203 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5204
5205 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5206 fGstSimdRegs &= ~fInThisHstReg;
5207 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5208 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5209 if (!fGstRegShadowsNew)
5210 {
5211 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5212 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5213 }
5214 } while (fGstSimdRegs != 0);
5215 }
5216 else
5217 {
5218 /*
5219 * Clear all.
5220 */
5221 do
5222 {
5223 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5224 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5225 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5226 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5227 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5228 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5229
5230 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5231 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5232 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5233 } while (fGstSimdRegs != 0);
5234 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5235 }
5236 }
5237}
5238
5239
5240/**
5241 * Allocates a temporary host SIMD register.
5242 *
5243 * This may emit code to save register content onto the stack in order to free
5244 * up a register.
5245 *
5246 * @returns The host register number; throws VBox status code on failure,
5247 * so no need to check the return value.
5248 * @param pReNative The native recompile state.
5249 * @param poff Pointer to the variable with the code buffer position.
5250 * This will be update if we need to move a variable from
5251 * register to stack in order to satisfy the request.
5252 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5253 * registers (@c true, default) or the other way around
5254 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5255 */
5256DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5257{
5258 /*
5259 * Try find a completely unused register, preferably a call-volatile one.
5260 */
5261 uint8_t idxSimdReg;
5262 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5263 & ~pReNative->Core.bmHstRegsWithGstShadow
5264 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5265 if (fRegs)
5266 {
5267 if (fPreferVolatile)
5268 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5269 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5270 else
5271 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5272 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5273 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5274 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5275
5276 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5277 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5278 }
5279 else
5280 {
5281 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5282 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5283 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5284 }
5285
5286 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5287 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5288}
5289
5290
5291/**
5292 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5293 * registers.
5294 *
5295 * @returns The host register number; throws VBox status code on failure,
5296 * so no need to check the return value.
5297 * @param pReNative The native recompile state.
5298 * @param poff Pointer to the variable with the code buffer position.
5299 * This will be update if we need to move a variable from
5300 * register to stack in order to satisfy the request.
5301 * @param fRegMask Mask of acceptable registers.
5302 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5303 * registers (@c true, default) or the other way around
5304 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5305 */
5306DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5307 bool fPreferVolatile /*= true*/)
5308{
5309 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5310 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5311
5312 /*
5313 * Try find a completely unused register, preferably a call-volatile one.
5314 */
5315 uint8_t idxSimdReg;
5316 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5317 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5318 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5319 & fRegMask;
5320 if (fRegs)
5321 {
5322 if (fPreferVolatile)
5323 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5324 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5325 else
5326 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5327 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5328 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5329 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5330
5331 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5332 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5333 }
5334 else
5335 {
5336 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5337 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5338 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5339 }
5340
5341 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5342 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5343}
5344
5345
5346/**
5347 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5348 *
5349 * @param pReNative The native recompile state.
5350 * @param idxHstSimdReg The host SIMD register to update the state for.
5351 * @param enmLoadSz The load size to set.
5352 */
5353DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5354 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5355{
5356 /* Everything valid already? -> nothing to do. */
5357 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5358 return;
5359
5360 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5361 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5362 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5363 {
5364 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5365 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5366 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5367 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5368 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5369 }
5370}
5371
5372
5373static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5374 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5375{
5376 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5377 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5378 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5379 {
5380# ifdef RT_ARCH_ARM64
5381 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5382 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5383# endif
5384
5385 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5386 {
5387 switch (enmLoadSzDst)
5388 {
5389 case kIemNativeGstSimdRegLdStSz_256:
5390 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5391 break;
5392 case kIemNativeGstSimdRegLdStSz_Low128:
5393 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5394 break;
5395 case kIemNativeGstSimdRegLdStSz_High128:
5396 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5397 break;
5398 default:
5399 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5400 }
5401
5402 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5403 }
5404 }
5405 else
5406 {
5407 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5408 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5409 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5410 }
5411
5412 return off;
5413}
5414
5415
5416/**
5417 * Allocates a temporary host SIMD register for keeping a guest
5418 * SIMD register value.
5419 *
5420 * Since we may already have a register holding the guest register value,
5421 * code will be emitted to do the loading if that's not the case. Code may also
5422 * be emitted if we have to free up a register to satify the request.
5423 *
5424 * @returns The host register number; throws VBox status code on failure, so no
5425 * need to check the return value.
5426 * @param pReNative The native recompile state.
5427 * @param poff Pointer to the variable with the code buffer
5428 * position. This will be update if we need to move a
5429 * variable from register to stack in order to satisfy
5430 * the request.
5431 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5432 * @param enmIntendedUse How the caller will be using the host register.
5433 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5434 * register is okay (default). The ASSUMPTION here is
5435 * that the caller has already flushed all volatile
5436 * registers, so this is only applied if we allocate a
5437 * new register.
5438 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5439 */
5440DECL_HIDDEN_THROW(uint8_t)
5441iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5442 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5443 bool fNoVolatileRegs /*= false*/)
5444{
5445 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5446#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5447 AssertMsg( pReNative->idxCurCall == 0
5448 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5449 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5450 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5451 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5452 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5453 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5454#endif
5455#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5456 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5457#endif
5458 uint32_t const fRegMask = !fNoVolatileRegs
5459 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5460 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5461
5462 /*
5463 * First check if the guest register value is already in a host register.
5464 */
5465 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5466 {
5467 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5468 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5469 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5470 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5471
5472 /* It's not supposed to be allocated... */
5473 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5474 {
5475 /*
5476 * If the register will trash the guest shadow copy, try find a
5477 * completely unused register we can use instead. If that fails,
5478 * we need to disassociate the host reg from the guest reg.
5479 */
5480 /** @todo would be nice to know if preserving the register is in any way helpful. */
5481 /* If the purpose is calculations, try duplicate the register value as
5482 we'll be clobbering the shadow. */
5483 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5484 && ( ~pReNative->Core.bmHstSimdRegs
5485 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5486 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5487 {
5488 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5489
5490 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5491
5492 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5493 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5494 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5495 idxSimdReg = idxRegNew;
5496 }
5497 /* If the current register matches the restrictions, go ahead and allocate
5498 it for the caller. */
5499 else if (fRegMask & RT_BIT_32(idxSimdReg))
5500 {
5501 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5502 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5503 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5504 {
5505 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5506 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5507 else
5508 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5509 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5510 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5511 }
5512 else
5513 {
5514 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5515 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5516 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5517 }
5518 }
5519 /* Otherwise, allocate a register that satisfies the caller and transfer
5520 the shadowing if compatible with the intended use. (This basically
5521 means the call wants a non-volatile register (RSP push/pop scenario).) */
5522 else
5523 {
5524 Assert(fNoVolatileRegs);
5525 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5526 !fNoVolatileRegs
5527 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5528 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5529 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5530 {
5531 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5532 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5533 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5534 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5535 }
5536 else
5537 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5538 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5539 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5540 idxSimdReg = idxRegNew;
5541 }
5542 }
5543 else
5544 {
5545 /*
5546 * Oops. Shadowed guest register already allocated!
5547 *
5548 * Allocate a new register, copy the value and, if updating, the
5549 * guest shadow copy assignment to the new register.
5550 */
5551 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5552 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5553 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5554 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5555
5556 /** @todo share register for readonly access. */
5557 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5558 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5559
5560 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5561 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5562 else
5563 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5564
5565 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5566 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5567 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5568 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5569 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5570 else
5571 {
5572 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5573 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5574 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5575 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5576 }
5577 idxSimdReg = idxRegNew;
5578 }
5579 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5580
5581#ifdef VBOX_STRICT
5582 /* Strict builds: Check that the value is correct. */
5583 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5584 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5585#endif
5586
5587 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5588 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5589 {
5590# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5591 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5592 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5593# endif
5594
5595 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5596 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5597 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5598 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5599 else
5600 {
5601 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5602 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5603 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5604 }
5605 }
5606
5607 return idxSimdReg;
5608 }
5609
5610 /*
5611 * Allocate a new register, load it with the guest value and designate it as a copy of the
5612 */
5613 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5614
5615 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5616 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5617 else
5618 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5619
5620 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5621 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5622
5623 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5624 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5625 {
5626# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5627 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5628 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5629# endif
5630
5631 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5632 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5633 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5634 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5635 else
5636 {
5637 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5638 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5639 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5640 }
5641 }
5642
5643 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5644 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5645
5646 return idxRegNew;
5647}
5648
5649
5650/**
5651 * Flushes guest SIMD register shadow copies held by a set of host registers.
5652 *
5653 * This is used whenever calling an external helper for ensuring that we don't carry on
5654 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5655 *
5656 * @param pReNative The native recompile state.
5657 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5658 */
5659DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5660{
5661 /*
5662 * Reduce the mask by what's currently shadowed.
5663 */
5664 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5665 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5666 if (fHstSimdRegs)
5667 {
5668 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5669 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5670 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5671 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5672 if (bmHstSimdRegsWithGstShadowNew)
5673 {
5674 /*
5675 * Partial (likely).
5676 */
5677 uint64_t fGstShadows = 0;
5678 do
5679 {
5680 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5681 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5682 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5683 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5684 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5685 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5686
5687 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5688 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5689 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5690 } while (fHstSimdRegs != 0);
5691 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5692 }
5693 else
5694 {
5695 /*
5696 * Clear all.
5697 */
5698 do
5699 {
5700 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5701 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5702 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5703 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5704 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5705 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5706
5707 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5708 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5709 } while (fHstSimdRegs != 0);
5710 pReNative->Core.bmGstSimdRegShadows = 0;
5711 }
5712 }
5713}
5714#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5715
5716
5717
5718/*********************************************************************************************************************************
5719* Code emitters for flushing pending guest register writes and sanity checks *
5720*********************************************************************************************************************************/
5721
5722#ifdef VBOX_STRICT
5723/**
5724 * Does internal register allocator sanity checks.
5725 */
5726DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5727{
5728 /*
5729 * Iterate host registers building a guest shadowing set.
5730 */
5731 uint64_t bmGstRegShadows = 0;
5732 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5733 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5734 while (bmHstRegsWithGstShadow)
5735 {
5736 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5737 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5738 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5739
5740 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5741 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5742 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5743 bmGstRegShadows |= fThisGstRegShadows;
5744 while (fThisGstRegShadows)
5745 {
5746 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5747 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5748 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5749 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5750 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5751 }
5752 }
5753 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5754 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5755 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5756
5757 /*
5758 * Now the other way around, checking the guest to host index array.
5759 */
5760 bmHstRegsWithGstShadow = 0;
5761 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5762 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5763 while (bmGstRegShadows)
5764 {
5765 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5766 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5767 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5768
5769 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5770 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5771 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5772 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5773 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5774 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5775 }
5776 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5777 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5778 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5779}
5780#endif /* VBOX_STRICT */
5781
5782
5783/**
5784 * Flushes any delayed guest register writes.
5785 *
5786 * This must be called prior to calling CImpl functions and any helpers that use
5787 * the guest state (like raising exceptions) and such.
5788 *
5789 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5790 * the caller if it wishes to do so.
5791 */
5792DECL_HIDDEN_THROW(uint32_t)
5793iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5794{
5795#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5796 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5797 off = iemNativeEmitPcWriteback(pReNative, off);
5798#else
5799 RT_NOREF(pReNative, fGstShwExcept);
5800#endif
5801
5802#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5803 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5804#endif
5805
5806#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5807 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5808#endif
5809
5810 return off;
5811}
5812
5813#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5814
5815# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5816
5817/**
5818 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5819 */
5820DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5821{
5822 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5823 Assert(pReNative->Core.fDebugPcInitialized);
5824
5825 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5826# ifdef RT_ARCH_AMD64
5827 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5828 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5829 pCodeBuf[off++] = 0x3b;
5830 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5831# else
5832 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5833 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5834 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5835# endif
5836
5837 uint32_t offFixup = off;
5838 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5839 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5840 iemNativeFixupFixedJump(pReNative, offFixup, off);
5841
5842 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5843 return off;
5844}
5845
5846
5847/**
5848 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5849 */
5850DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5851{
5852 if (pReNative->Core.fDebugPcInitialized)
5853 {
5854 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5855 if (pReNative->Core.offPc)
5856 {
5857 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5858 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5859 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5860 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5861 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5862 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5863 }
5864 else
5865 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5866 iemNativeRegFreeTmp(pReNative, idxPcReg);
5867 }
5868 return off;
5869}
5870
5871# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
5872
5873/**
5874 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5875 */
5876DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5877{
5878 Assert(pReNative->Core.offPc);
5879# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
5880 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
5881# else
5882 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
5883 uint8_t idxCurCall = pReNative->idxCurCall;
5884 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
5885 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
5886 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
5887 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
5888 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
5889 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
5890 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
5891
5892 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
5893
5894# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5895 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5896 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
5897# endif
5898# endif
5899
5900# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5901 /* Allocate a temporary PC register. */
5902 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5903
5904 /* Perform the addition and store the result. */
5905 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5906 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5907# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5908 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5909# endif
5910
5911 /* Free but don't flush the PC register. */
5912 iemNativeRegFreeTmp(pReNative, idxPcReg);
5913# else
5914 /* Compare the shadow with the context value, they should match. */
5915 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5916 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5917# endif
5918
5919 pReNative->Core.offPc = 0;
5920
5921 return off;
5922}
5923
5924#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5925
5926
5927/*********************************************************************************************************************************
5928* Code Emitters (larger snippets) *
5929*********************************************************************************************************************************/
5930
5931/**
5932 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5933 * extending to 64-bit width.
5934 *
5935 * @returns New code buffer offset on success, UINT32_MAX on failure.
5936 * @param pReNative .
5937 * @param off The current code buffer position.
5938 * @param idxHstReg The host register to load the guest register value into.
5939 * @param enmGstReg The guest register to load.
5940 *
5941 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5942 * that is something the caller needs to do if applicable.
5943 */
5944DECL_HIDDEN_THROW(uint32_t)
5945iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5946{
5947 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5948 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5949
5950 switch (g_aGstShadowInfo[enmGstReg].cb)
5951 {
5952 case sizeof(uint64_t):
5953 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5954 case sizeof(uint32_t):
5955 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5956 case sizeof(uint16_t):
5957 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5958#if 0 /* not present in the table. */
5959 case sizeof(uint8_t):
5960 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5961#endif
5962 default:
5963 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5964 }
5965}
5966
5967
5968#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5969/**
5970 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5971 *
5972 * @returns New code buffer offset on success, UINT32_MAX on failure.
5973 * @param pReNative The recompiler state.
5974 * @param off The current code buffer position.
5975 * @param idxHstSimdReg The host register to load the guest register value into.
5976 * @param enmGstSimdReg The guest register to load.
5977 * @param enmLoadSz The load size of the register.
5978 *
5979 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5980 * that is something the caller needs to do if applicable.
5981 */
5982DECL_HIDDEN_THROW(uint32_t)
5983iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5984 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5985{
5986 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5987
5988 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5989 switch (enmLoadSz)
5990 {
5991 case kIemNativeGstSimdRegLdStSz_256:
5992 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5993 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5994 case kIemNativeGstSimdRegLdStSz_Low128:
5995 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5996 case kIemNativeGstSimdRegLdStSz_High128:
5997 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5998 default:
5999 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6000 }
6001}
6002#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6003
6004#ifdef VBOX_STRICT
6005
6006/**
6007 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6008 *
6009 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6010 * Trashes EFLAGS on AMD64.
6011 */
6012DECL_HIDDEN_THROW(uint32_t)
6013iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6014{
6015# ifdef RT_ARCH_AMD64
6016 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6017
6018 /* rol reg64, 32 */
6019 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6020 pbCodeBuf[off++] = 0xc1;
6021 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6022 pbCodeBuf[off++] = 32;
6023
6024 /* test reg32, ffffffffh */
6025 if (idxReg >= 8)
6026 pbCodeBuf[off++] = X86_OP_REX_B;
6027 pbCodeBuf[off++] = 0xf7;
6028 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6029 pbCodeBuf[off++] = 0xff;
6030 pbCodeBuf[off++] = 0xff;
6031 pbCodeBuf[off++] = 0xff;
6032 pbCodeBuf[off++] = 0xff;
6033
6034 /* je/jz +1 */
6035 pbCodeBuf[off++] = 0x74;
6036 pbCodeBuf[off++] = 0x01;
6037
6038 /* int3 */
6039 pbCodeBuf[off++] = 0xcc;
6040
6041 /* rol reg64, 32 */
6042 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6043 pbCodeBuf[off++] = 0xc1;
6044 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6045 pbCodeBuf[off++] = 32;
6046
6047# elif defined(RT_ARCH_ARM64)
6048 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6049 /* lsr tmp0, reg64, #32 */
6050 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6051 /* cbz tmp0, +1 */
6052 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6053 /* brk #0x1100 */
6054 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6055
6056# else
6057# error "Port me!"
6058# endif
6059 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6060 return off;
6061}
6062
6063
6064/**
6065 * Emitting code that checks that the content of register @a idxReg is the same
6066 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6067 * instruction if that's not the case.
6068 *
6069 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6070 * Trashes EFLAGS on AMD64.
6071 */
6072DECL_HIDDEN_THROW(uint32_t)
6073iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6074{
6075#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6076 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6077 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6078 return off;
6079#endif
6080
6081# ifdef RT_ARCH_AMD64
6082 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6083
6084 /* cmp reg, [mem] */
6085 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6086 {
6087 if (idxReg >= 8)
6088 pbCodeBuf[off++] = X86_OP_REX_R;
6089 pbCodeBuf[off++] = 0x38;
6090 }
6091 else
6092 {
6093 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6094 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6095 else
6096 {
6097 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6098 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6099 else
6100 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6101 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6102 if (idxReg >= 8)
6103 pbCodeBuf[off++] = X86_OP_REX_R;
6104 }
6105 pbCodeBuf[off++] = 0x39;
6106 }
6107 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6108
6109 /* je/jz +1 */
6110 pbCodeBuf[off++] = 0x74;
6111 pbCodeBuf[off++] = 0x01;
6112
6113 /* int3 */
6114 pbCodeBuf[off++] = 0xcc;
6115
6116 /* For values smaller than the register size, we must check that the rest
6117 of the register is all zeros. */
6118 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6119 {
6120 /* test reg64, imm32 */
6121 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6122 pbCodeBuf[off++] = 0xf7;
6123 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6124 pbCodeBuf[off++] = 0;
6125 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6126 pbCodeBuf[off++] = 0xff;
6127 pbCodeBuf[off++] = 0xff;
6128
6129 /* je/jz +1 */
6130 pbCodeBuf[off++] = 0x74;
6131 pbCodeBuf[off++] = 0x01;
6132
6133 /* int3 */
6134 pbCodeBuf[off++] = 0xcc;
6135 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6136 }
6137 else
6138 {
6139 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6140 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6141 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6142 }
6143
6144# elif defined(RT_ARCH_ARM64)
6145 /* mov TMP0, [gstreg] */
6146 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6147
6148 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6149 /* sub tmp0, tmp0, idxReg */
6150 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6151 /* cbz tmp0, +1 */
6152 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6153 /* brk #0x1000+enmGstReg */
6154 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6155 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6156
6157# else
6158# error "Port me!"
6159# endif
6160 return off;
6161}
6162
6163
6164# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6165# ifdef RT_ARCH_AMD64
6166/**
6167 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6168 */
6169DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6170{
6171 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6172 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6173 if (idxSimdReg >= 8)
6174 pbCodeBuf[off++] = X86_OP_REX_R;
6175 pbCodeBuf[off++] = 0x0f;
6176 pbCodeBuf[off++] = 0x38;
6177 pbCodeBuf[off++] = 0x29;
6178 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6179
6180 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6181 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6182 pbCodeBuf[off++] = X86_OP_REX_W
6183 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6184 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6185 pbCodeBuf[off++] = 0x0f;
6186 pbCodeBuf[off++] = 0x3a;
6187 pbCodeBuf[off++] = 0x16;
6188 pbCodeBuf[off++] = 0xeb;
6189 pbCodeBuf[off++] = 0x00;
6190
6191 /* cmp tmp0, 0xffffffffffffffff. */
6192 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6193 pbCodeBuf[off++] = 0x83;
6194 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6195 pbCodeBuf[off++] = 0xff;
6196
6197 /* je/jz +1 */
6198 pbCodeBuf[off++] = 0x74;
6199 pbCodeBuf[off++] = 0x01;
6200
6201 /* int3 */
6202 pbCodeBuf[off++] = 0xcc;
6203
6204 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6205 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6206 pbCodeBuf[off++] = X86_OP_REX_W
6207 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6208 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6209 pbCodeBuf[off++] = 0x0f;
6210 pbCodeBuf[off++] = 0x3a;
6211 pbCodeBuf[off++] = 0x16;
6212 pbCodeBuf[off++] = 0xeb;
6213 pbCodeBuf[off++] = 0x01;
6214
6215 /* cmp tmp0, 0xffffffffffffffff. */
6216 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6217 pbCodeBuf[off++] = 0x83;
6218 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6219 pbCodeBuf[off++] = 0xff;
6220
6221 /* je/jz +1 */
6222 pbCodeBuf[off++] = 0x74;
6223 pbCodeBuf[off++] = 0x01;
6224
6225 /* int3 */
6226 pbCodeBuf[off++] = 0xcc;
6227
6228 return off;
6229}
6230# endif
6231
6232
6233/**
6234 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6235 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6236 * instruction if that's not the case.
6237 *
6238 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6239 * Trashes EFLAGS on AMD64.
6240 */
6241DECL_HIDDEN_THROW(uint32_t)
6242iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6243 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6244{
6245 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6246 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6247 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6248 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6249 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6250 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6251 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6252 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6253 return off;
6254
6255# ifdef RT_ARCH_AMD64
6256 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6257 {
6258 /* movdqa vectmp0, idxSimdReg */
6259 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6260
6261 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6262
6263 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6264 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6265 }
6266
6267 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6268 {
6269 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6270 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6271
6272 /* vextracti128 vectmp0, idxSimdReg, 1 */
6273 pbCodeBuf[off++] = X86_OP_VEX3;
6274 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6275 | X86_OP_VEX3_BYTE1_X
6276 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6277 | 0x03; /* Opcode map */
6278 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6279 pbCodeBuf[off++] = 0x39;
6280 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6281 pbCodeBuf[off++] = 0x01;
6282
6283 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6284 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6285 }
6286# elif defined(RT_ARCH_ARM64)
6287 /* mov vectmp0, [gstreg] */
6288 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6289
6290 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6291 {
6292 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6293 /* eor vectmp0, vectmp0, idxSimdReg */
6294 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6295 /* uaddlv vectmp0, vectmp0.16B */
6296 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6297 /* umov tmp0, vectmp0.H[0] */
6298 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6299 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6300 /* cbz tmp0, +1 */
6301 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6302 /* brk #0x1000+enmGstReg */
6303 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6304 }
6305
6306 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6307 {
6308 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6309 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6310 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6311 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6312 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6313 /* umov tmp0, (vectmp0 + 1).H[0] */
6314 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6315 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6316 /* cbz tmp0, +1 */
6317 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6318 /* brk #0x1000+enmGstReg */
6319 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6320 }
6321
6322# else
6323# error "Port me!"
6324# endif
6325
6326 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6327 return off;
6328}
6329# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6330
6331
6332/**
6333 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6334 * important bits.
6335 *
6336 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6337 * Trashes EFLAGS on AMD64.
6338 */
6339DECL_HIDDEN_THROW(uint32_t)
6340iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6341{
6342 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6343 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6344 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6345 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6346
6347#ifdef RT_ARCH_AMD64
6348 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6349
6350 /* je/jz +1 */
6351 pbCodeBuf[off++] = 0x74;
6352 pbCodeBuf[off++] = 0x01;
6353
6354 /* int3 */
6355 pbCodeBuf[off++] = 0xcc;
6356
6357# elif defined(RT_ARCH_ARM64)
6358 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6359
6360 /* b.eq +1 */
6361 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6362 /* brk #0x2000 */
6363 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6364
6365# else
6366# error "Port me!"
6367# endif
6368 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6369
6370 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6371 return off;
6372}
6373
6374#endif /* VBOX_STRICT */
6375
6376
6377#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6378/**
6379 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6380 */
6381DECL_HIDDEN_THROW(uint32_t)
6382iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6383{
6384 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6385
6386 fEflNeeded &= X86_EFL_STATUS_BITS;
6387 if (fEflNeeded)
6388 {
6389# ifdef RT_ARCH_AMD64
6390 /* test dword [pVCpu + offVCpu], imm32 */
6391 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6392 if (fEflNeeded <= 0xff)
6393 {
6394 pCodeBuf[off++] = 0xf6;
6395 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6396 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6397 }
6398 else
6399 {
6400 pCodeBuf[off++] = 0xf7;
6401 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6402 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6403 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6404 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6405 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6406 }
6407
6408 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6409 pCodeBuf[off++] = 0xcc;
6410
6411 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6412
6413# else
6414 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6415 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6416 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6417# ifdef RT_ARCH_ARM64
6418 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6419 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6420# else
6421# error "Port me!"
6422# endif
6423 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6424# endif
6425 }
6426 return off;
6427}
6428#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6429
6430
6431/**
6432 * Emits a code for checking the return code of a call and rcPassUp, returning
6433 * from the code if either are non-zero.
6434 */
6435DECL_HIDDEN_THROW(uint32_t)
6436iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6437{
6438#ifdef RT_ARCH_AMD64
6439 /*
6440 * AMD64: eax = call status code.
6441 */
6442
6443 /* edx = rcPassUp */
6444 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6445# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6446 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6447# endif
6448
6449 /* edx = eax | rcPassUp */
6450 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6451 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6452 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6453 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6454
6455 /* Jump to non-zero status return path. */
6456 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6457
6458 /* done. */
6459
6460#elif RT_ARCH_ARM64
6461 /*
6462 * ARM64: w0 = call status code.
6463 */
6464# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6465 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6466# endif
6467 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6468
6469 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6470
6471 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6472
6473 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pu32CodeBuf, off, ARMV8_A64_REG_X4, true /*f64Bit*/,
6474 kIemNativeLabelType_NonZeroRetOrPassUp);
6475
6476#else
6477# error "port me"
6478#endif
6479 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6480 RT_NOREF_PV(idxInstr);
6481 return off;
6482}
6483
6484
6485/**
6486 * Emits a call to a CImpl function or something similar.
6487 */
6488DECL_HIDDEN_THROW(uint32_t)
6489iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6490 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6491{
6492 /* Writeback everything. */
6493 off = iemNativeRegFlushPendingWrites(pReNative, off);
6494
6495 /*
6496 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6497 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6498 */
6499 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6500 fGstShwFlush
6501 | RT_BIT_64(kIemNativeGstReg_Pc)
6502 | RT_BIT_64(kIemNativeGstReg_EFlags));
6503 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6504
6505 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6506
6507 /*
6508 * Load the parameters.
6509 */
6510#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6511 /* Special code the hidden VBOXSTRICTRC pointer. */
6512 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6513 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6514 if (cAddParams > 0)
6515 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6516 if (cAddParams > 1)
6517 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6518 if (cAddParams > 2)
6519 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6520 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6521
6522#else
6523 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6524 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6525 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6526 if (cAddParams > 0)
6527 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6528 if (cAddParams > 1)
6529 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6530 if (cAddParams > 2)
6531# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6532 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6533# else
6534 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6535# endif
6536#endif
6537
6538 /*
6539 * Make the call.
6540 */
6541 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6542
6543#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6544 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6545#endif
6546
6547#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6548 pReNative->Core.fDebugPcInitialized = false;
6549 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6550#endif
6551
6552 /*
6553 * Check the status code.
6554 */
6555 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6556}
6557
6558
6559/**
6560 * Emits a call to a threaded worker function.
6561 */
6562DECL_HIDDEN_THROW(uint32_t)
6563iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6564{
6565 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6566
6567 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6568 off = iemNativeRegFlushPendingWrites(pReNative, off);
6569
6570 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6571 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6572
6573#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6574 /* The threaded function may throw / long jmp, so set current instruction
6575 number if we're counting. */
6576 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6577#endif
6578
6579 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6580
6581#ifdef RT_ARCH_AMD64
6582 /* Load the parameters and emit the call. */
6583# ifdef RT_OS_WINDOWS
6584# ifndef VBOXSTRICTRC_STRICT_ENABLED
6585 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6586 if (cParams > 0)
6587 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6588 if (cParams > 1)
6589 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6590 if (cParams > 2)
6591 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6592# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6593 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6594 if (cParams > 0)
6595 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6596 if (cParams > 1)
6597 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6598 if (cParams > 2)
6599 {
6600 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6601 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6602 }
6603 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6604# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6605# else
6606 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6607 if (cParams > 0)
6608 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6609 if (cParams > 1)
6610 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6611 if (cParams > 2)
6612 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6613# endif
6614
6615 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6616
6617# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6618 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6619# endif
6620
6621#elif RT_ARCH_ARM64
6622 /*
6623 * ARM64:
6624 */
6625 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6626 if (cParams > 0)
6627 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6628 if (cParams > 1)
6629 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6630 if (cParams > 2)
6631 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6632
6633 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6634
6635#else
6636# error "port me"
6637#endif
6638
6639#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6640 pReNative->Core.fDebugPcInitialized = false;
6641 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6642#endif
6643
6644 /*
6645 * Check the status code.
6646 */
6647 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6648
6649 return off;
6650}
6651
6652
6653/**
6654 * The default liveness function, matching iemNativeEmitThreadedCall.
6655 */
6656IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6657{
6658 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6659 RT_NOREF(pCallEntry);
6660}
6661
6662#ifdef VBOX_WITH_STATISTICS
6663
6664/**
6665 * Emits code to update the thread call statistics.
6666 */
6667DECL_INLINE_THROW(uint32_t)
6668iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6669{
6670 /*
6671 * Update threaded function stats.
6672 */
6673 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6674 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6675# if defined(RT_ARCH_ARM64)
6676 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6677 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6678 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6679 iemNativeRegFreeTmp(pReNative, idxTmp1);
6680 iemNativeRegFreeTmp(pReNative, idxTmp2);
6681# else
6682 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6683# endif
6684 return off;
6685}
6686
6687
6688/**
6689 * Emits code to update the TB exit reason statistics.
6690 */
6691DECL_INLINE_THROW(uint32_t)
6692iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6693{
6694 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6695 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6696 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6697 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6698 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6699
6700 return off;
6701}
6702
6703#endif /* VBOX_WITH_STATISTICS */
6704
6705/**
6706 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6707 */
6708static uint32_t
6709iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6710{
6711 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6712 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6713
6714 /* Jump to ReturnBreak if the return register is NULL. */
6715 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6716 true /*f64Bit*/, offReturnBreak);
6717
6718 /* Okay, continue executing the next TB. */
6719 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6720 return off;
6721}
6722
6723#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6724
6725/**
6726 * Worker for iemNativeEmitReturnBreakViaLookup.
6727 */
6728static uint32_t iemNativeEmitViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak,
6729 IEMNATIVELABELTYPE enmLabel, uintptr_t pfnHelper)
6730{
6731 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
6732 if (idxLabel != UINT32_MAX)
6733 {
6734 iemNativeLabelDefine(pReNative, idxLabel, off);
6735 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, pfnHelper);
6736 }
6737 return off;
6738}
6739
6740
6741/**
6742 * Emits the code at the ReturnBreakViaLookup, ReturnBreakViaLookupWithIrq,
6743 * ReturnBreakViaLookupWithTlb and ReturnBreakViaLookupWithTlbAndIrq labels
6744 * (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS or jumps to the next TB).
6745 */
6746static uint32_t iemNativeEmitReturnBreakViaLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnBreakLabel)
6747{
6748 uint32_t const offReturnBreak = pReNative->paLabels[idxReturnBreakLabel].off;
6749 Assert(offReturnBreak < off);
6750
6751 /*
6752 * The lookup table index is in IEMNATIVE_CALL_ARG1_GREG for all.
6753 * The GCPhysPc is in IEMNATIVE_CALL_ARG2_GREG for ReturnBreakViaLookupWithPc.
6754 */
6755 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookup,
6756 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/>);
6757 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
6758 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/>);
6759 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
6760 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/>);
6761 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
6762 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/>);
6763 return off;
6764}
6765
6766#endif /* !IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
6767
6768/**
6769 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6770 */
6771static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6772{
6773 /* set the return status */
6774 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6775}
6776
6777
6778#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6779/**
6780 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6781 */
6782static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6783{
6784 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6785 if (idxLabel != UINT32_MAX)
6786 {
6787 iemNativeLabelDefine(pReNative, idxLabel, off);
6788 /* set the return status */
6789 off = iemNativeEmitCoreReturnWithFlags(pReNative, off);
6790 /* jump back to the return sequence. */
6791 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6792 }
6793 return off;
6794}
6795#endif
6796
6797
6798/**
6799 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6800 */
6801static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6802{
6803 /* set the return status */
6804 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6805}
6806
6807
6808#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6809/**
6810 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6811 */
6812static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6813{
6814 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6815 if (idxLabel != UINT32_MAX)
6816 {
6817 iemNativeLabelDefine(pReNative, idxLabel, off);
6818 /* set the return status */
6819 off = iemNativeEmitCoreReturnBreakFF(pReNative, off);
6820 /* jump back to the return sequence. */
6821 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6822 }
6823 return off;
6824}
6825#endif
6826
6827
6828/**
6829 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6830 */
6831static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6832{
6833 /* set the return status */
6834 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6835}
6836
6837
6838#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6839/**
6840 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6841 */
6842static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6843{
6844 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6845 if (idxLabel != UINT32_MAX)
6846 {
6847 iemNativeLabelDefine(pReNative, idxLabel, off);
6848 /* set the return status */
6849 off = iemNativeEmitCoreReturnBreak(pReNative, off);
6850 /* jump back to the return sequence. */
6851 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6852 }
6853 return off;
6854}
6855#endif
6856
6857
6858/**
6859 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6860 */
6861static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6862{
6863 /*
6864 * Generate the rc + rcPassUp fiddling code.
6865 */
6866 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6867#ifdef RT_ARCH_AMD64
6868# ifdef RT_OS_WINDOWS
6869# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6870 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6871# endif
6872 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6873 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6874# else
6875 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6876 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6877# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6878 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6879# endif
6880# endif
6881# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6882 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6883# endif
6884
6885#else
6886 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6887 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6888 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6889#endif
6890
6891 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6892 return off;
6893}
6894
6895
6896#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6897/**
6898 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6899 */
6900static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6901{
6902 /*
6903 * Generate the rc + rcPassUp fiddling code if needed.
6904 */
6905 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6906 if (idxLabel != UINT32_MAX)
6907 {
6908 iemNativeLabelDefine(pReNative, idxLabel, off);
6909 off = iemNativeEmitCoreRcFiddling(pReNative, off);
6910 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6911 }
6912 return off;
6913}
6914#endif
6915
6916
6917/**
6918 * Emits a standard epilog.
6919 */
6920static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6921{
6922 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6923
6924 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6925
6926 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6927 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6928
6929 /*
6930 * Restore registers and return.
6931 */
6932#ifdef RT_ARCH_AMD64
6933 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6934
6935 /* Reposition esp at the r15 restore point. */
6936 pbCodeBuf[off++] = X86_OP_REX_W;
6937 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6938 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6939 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6940
6941 /* Pop non-volatile registers and return */
6942 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6943 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6944 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6945 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6946 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6947 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6948 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6949 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6950# ifdef RT_OS_WINDOWS
6951 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6952 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6953# endif
6954 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6955 pbCodeBuf[off++] = 0xc9; /* leave */
6956 pbCodeBuf[off++] = 0xc3; /* ret */
6957 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6958
6959#elif RT_ARCH_ARM64
6960 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6961
6962 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6963 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6964 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6965 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6966 IEMNATIVE_FRAME_VAR_SIZE / 8);
6967 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6968 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6969 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6970 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6971 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6972 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6973 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6974 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6975 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6976 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6977 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6978 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6979
6980 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6981 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6982 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6983 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6984
6985 /* retab / ret */
6986# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6987 if (1)
6988 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6989 else
6990# endif
6991 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6992
6993#else
6994# error "port me"
6995#endif
6996 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6997
6998 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6999 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7000
7001 return off;
7002}
7003
7004
7005#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
7006/**
7007 * Emits a standard epilog.
7008 */
7009static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7010{
7011 /*
7012 * Define label for common return point.
7013 */
7014 *pidxReturnLabel = UINT32_MAX;
7015 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7016 *pidxReturnLabel = idxReturn;
7017
7018 /*
7019 * Emit the code.
7020 */
7021 return iemNativeEmitCoreEpilog(pReNative, off);
7022}
7023#endif
7024
7025
7026#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
7027/**
7028 * Emits a standard prolog.
7029 */
7030static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7031{
7032#ifdef RT_ARCH_AMD64
7033 /*
7034 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7035 * reserving 64 bytes for stack variables plus 4 non-register argument
7036 * slots. Fixed register assignment: xBX = pReNative;
7037 *
7038 * Since we always do the same register spilling, we can use the same
7039 * unwind description for all the code.
7040 */
7041 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7042 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7043 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7044 pbCodeBuf[off++] = 0x8b;
7045 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7046 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7047 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7048# ifdef RT_OS_WINDOWS
7049 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7050 pbCodeBuf[off++] = 0x8b;
7051 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7052 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7053 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7054# else
7055 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7056 pbCodeBuf[off++] = 0x8b;
7057 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7058# endif
7059 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7060 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7061 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7062 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7063 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7064 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7065 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7066 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7067
7068# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7069 /* Save the frame pointer. */
7070 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7071# endif
7072
7073 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7074 X86_GREG_xSP,
7075 IEMNATIVE_FRAME_ALIGN_SIZE
7076 + IEMNATIVE_FRAME_VAR_SIZE
7077 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7078 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7079 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7080 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7081 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7082
7083#elif RT_ARCH_ARM64
7084 /*
7085 * We set up a stack frame exactly like on x86, only we have to push the
7086 * return address our selves here. We save all non-volatile registers.
7087 */
7088 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7089
7090# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7091 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7092 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7093 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7094 /* pacibsp */
7095 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7096# endif
7097
7098 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7099 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7100 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7101 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7102 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7103 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7104 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7105 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7106 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7107 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7108 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7109 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7110 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7111 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7112 /* Save the BP and LR (ret address) registers at the top of the frame. */
7113 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7114 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7115 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7116 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7117 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7118 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7119
7120 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7121 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7122
7123 /* mov r28, r0 */
7124 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7125 /* mov r27, r1 */
7126 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7127
7128# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7129 /* Save the frame pointer. */
7130 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7131 ARMV8_A64_REG_X2);
7132# endif
7133
7134#else
7135# error "port me"
7136#endif
7137 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7138 return off;
7139}
7140#endif
7141
7142
7143/*********************************************************************************************************************************
7144* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7145*********************************************************************************************************************************/
7146
7147/**
7148 * Internal work that allocates a variable with kind set to
7149 * kIemNativeVarKind_Invalid and no current stack allocation.
7150 *
7151 * The kind will either be set by the caller or later when the variable is first
7152 * assigned a value.
7153 *
7154 * @returns Unpacked index.
7155 * @internal
7156 */
7157static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7158{
7159 Assert(cbType > 0 && cbType <= 64);
7160 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7161 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7162 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7163 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7164 pReNative->Core.aVars[idxVar].cbVar = cbType;
7165 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7166 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7167 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7168 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7169 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7170 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7171 pReNative->Core.aVars[idxVar].u.uValue = 0;
7172#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7173 pReNative->Core.aVars[idxVar].fSimdReg = false;
7174#endif
7175 return idxVar;
7176}
7177
7178
7179/**
7180 * Internal work that allocates an argument variable w/o setting enmKind.
7181 *
7182 * @returns Unpacked index.
7183 * @internal
7184 */
7185static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7186{
7187 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7188 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7189 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7190
7191 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7192 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7193 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7194 return idxVar;
7195}
7196
7197
7198/**
7199 * Gets the stack slot for a stack variable, allocating one if necessary.
7200 *
7201 * Calling this function implies that the stack slot will contain a valid
7202 * variable value. The caller deals with any register currently assigned to the
7203 * variable, typically by spilling it into the stack slot.
7204 *
7205 * @returns The stack slot number.
7206 * @param pReNative The recompiler state.
7207 * @param idxVar The variable.
7208 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7209 */
7210DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7211{
7212 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7213 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7214 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7215
7216 /* Already got a slot? */
7217 uint8_t const idxStackSlot = pVar->idxStackSlot;
7218 if (idxStackSlot != UINT8_MAX)
7219 {
7220 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7221 return idxStackSlot;
7222 }
7223
7224 /*
7225 * A single slot is easy to allocate.
7226 * Allocate them from the top end, closest to BP, to reduce the displacement.
7227 */
7228 if (pVar->cbVar <= sizeof(uint64_t))
7229 {
7230 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7231 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7232 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7233 pVar->idxStackSlot = (uint8_t)iSlot;
7234 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7235 return (uint8_t)iSlot;
7236 }
7237
7238 /*
7239 * We need more than one stack slot.
7240 *
7241 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7242 */
7243 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7244 Assert(pVar->cbVar <= 64);
7245 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7246 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7247 uint32_t bmStack = pReNative->Core.bmStack;
7248 while (bmStack != UINT32_MAX)
7249 {
7250 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7251 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7252 iSlot = (iSlot - 1) & ~fBitAlignMask;
7253 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7254 {
7255 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7256 pVar->idxStackSlot = (uint8_t)iSlot;
7257 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7258 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7259 return (uint8_t)iSlot;
7260 }
7261
7262 bmStack |= (fBitAllocMask << iSlot);
7263 }
7264 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7265}
7266
7267
7268/**
7269 * Changes the variable to a stack variable.
7270 *
7271 * Currently this is s only possible to do the first time the variable is used,
7272 * switching later is can be implemented but not done.
7273 *
7274 * @param pReNative The recompiler state.
7275 * @param idxVar The variable.
7276 * @throws VERR_IEM_VAR_IPE_2
7277 */
7278DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7279{
7280 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7281 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7282 if (pVar->enmKind != kIemNativeVarKind_Stack)
7283 {
7284 /* We could in theory transition from immediate to stack as well, but it
7285 would involve the caller doing work storing the value on the stack. So,
7286 till that's required we only allow transition from invalid. */
7287 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7288 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7289 pVar->enmKind = kIemNativeVarKind_Stack;
7290
7291 /* Note! We don't allocate a stack slot here, that's only done when a
7292 slot is actually needed to hold a variable value. */
7293 }
7294}
7295
7296
7297/**
7298 * Sets it to a variable with a constant value.
7299 *
7300 * This does not require stack storage as we know the value and can always
7301 * reload it, unless of course it's referenced.
7302 *
7303 * @param pReNative The recompiler state.
7304 * @param idxVar The variable.
7305 * @param uValue The immediate value.
7306 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7307 */
7308DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7309{
7310 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7311 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7312 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7313 {
7314 /* Only simple transitions for now. */
7315 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7316 pVar->enmKind = kIemNativeVarKind_Immediate;
7317 }
7318 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7319
7320 pVar->u.uValue = uValue;
7321 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7322 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7323 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7324}
7325
7326
7327/**
7328 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7329 *
7330 * This does not require stack storage as we know the value and can always
7331 * reload it. Loading is postponed till needed.
7332 *
7333 * @param pReNative The recompiler state.
7334 * @param idxVar The variable. Unpacked.
7335 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7336 *
7337 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7338 * @internal
7339 */
7340static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7341{
7342 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7343 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7344
7345 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7346 {
7347 /* Only simple transitions for now. */
7348 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7349 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7350 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7351 }
7352 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7353
7354 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7355
7356 /* Update the other variable, ensure it's a stack variable. */
7357 /** @todo handle variables with const values... that'll go boom now. */
7358 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7359 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7360}
7361
7362
7363/**
7364 * Sets the variable to a reference (pointer) to a guest register reference.
7365 *
7366 * This does not require stack storage as we know the value and can always
7367 * reload it. Loading is postponed till needed.
7368 *
7369 * @param pReNative The recompiler state.
7370 * @param idxVar The variable.
7371 * @param enmRegClass The class guest registers to reference.
7372 * @param idxReg The register within @a enmRegClass to reference.
7373 *
7374 * @throws VERR_IEM_VAR_IPE_2
7375 */
7376DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7377 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7378{
7379 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7380 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7381
7382 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7383 {
7384 /* Only simple transitions for now. */
7385 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7386 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7387 }
7388 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7389
7390 pVar->u.GstRegRef.enmClass = enmRegClass;
7391 pVar->u.GstRegRef.idx = idxReg;
7392}
7393
7394
7395DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7396{
7397 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7398}
7399
7400
7401DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7402{
7403 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7404
7405 /* Since we're using a generic uint64_t value type, we must truncate it if
7406 the variable is smaller otherwise we may end up with too large value when
7407 scaling up a imm8 w/ sign-extension.
7408
7409 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7410 in the bios, bx=1) when running on arm, because clang expect 16-bit
7411 register parameters to have bits 16 and up set to zero. Instead of
7412 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7413 CF value in the result. */
7414 switch (cbType)
7415 {
7416 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7417 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7418 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7419 }
7420 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7421 return idxVar;
7422}
7423
7424
7425DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7426{
7427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7428 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7429 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7430 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7431 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7432 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7433
7434 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7435 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7436 return idxArgVar;
7437}
7438
7439
7440DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7441{
7442 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7443 /* Don't set to stack now, leave that to the first use as for instance
7444 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7445 return idxVar;
7446}
7447
7448
7449DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7450{
7451 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7452
7453 /* Since we're using a generic uint64_t value type, we must truncate it if
7454 the variable is smaller otherwise we may end up with too large value when
7455 scaling up a imm8 w/ sign-extension. */
7456 switch (cbType)
7457 {
7458 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7459 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7460 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7461 }
7462 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7463 return idxVar;
7464}
7465
7466
7467DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7468 uint8_t cbType, uint8_t idxVarOther)
7469{
7470 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7471 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7472
7473 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7474 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7475
7476/** @todo combine MOV and AND using MOVZX/similar. */
7477 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7478
7479 /* Truncate the value to this variables size. */
7480 switch (cbType)
7481 {
7482 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7483 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7484 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7485 }
7486
7487 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7488 iemNativeVarRegisterRelease(pReNative, idxVar);
7489 return idxVar;
7490}
7491
7492
7493/**
7494 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7495 * fixed till we call iemNativeVarRegisterRelease.
7496 *
7497 * @returns The host register number.
7498 * @param pReNative The recompiler state.
7499 * @param idxVar The variable.
7500 * @param poff Pointer to the instruction buffer offset.
7501 * In case a register needs to be freed up or the value
7502 * loaded off the stack.
7503 * @param fInitialized Set if the variable must already have been initialized.
7504 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7505 * the case.
7506 * @param idxRegPref Preferred register number or UINT8_MAX.
7507 */
7508DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7509 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7510{
7511 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7512 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7513 Assert(pVar->cbVar <= 8);
7514 Assert(!pVar->fRegAcquired);
7515
7516 uint8_t idxReg = pVar->idxReg;
7517 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7518 {
7519 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7520 && pVar->enmKind < kIemNativeVarKind_End);
7521 pVar->fRegAcquired = true;
7522 return idxReg;
7523 }
7524
7525 /*
7526 * If the kind of variable has not yet been set, default to 'stack'.
7527 */
7528 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7529 && pVar->enmKind < kIemNativeVarKind_End);
7530 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7531 iemNativeVarSetKindToStack(pReNative, idxVar);
7532
7533 /*
7534 * We have to allocate a register for the variable, even if its a stack one
7535 * as we don't know if there are modification being made to it before its
7536 * finalized (todo: analyze and insert hints about that?).
7537 *
7538 * If we can, we try get the correct register for argument variables. This
7539 * is assuming that most argument variables are fetched as close as possible
7540 * to the actual call, so that there aren't any interfering hidden calls
7541 * (memory accesses, etc) inbetween.
7542 *
7543 * If we cannot or it's a variable, we make sure no argument registers
7544 * that will be used by this MC block will be allocated here, and we always
7545 * prefer non-volatile registers to avoid needing to spill stuff for internal
7546 * call.
7547 */
7548 /** @todo Detect too early argument value fetches and warn about hidden
7549 * calls causing less optimal code to be generated in the python script. */
7550
7551 uint8_t const uArgNo = pVar->uArgNo;
7552 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7553 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7554 {
7555 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7556
7557#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7558 /* Writeback any dirty shadow registers we are about to unshadow. */
7559 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7560#endif
7561
7562 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7563 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7564 }
7565 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7566 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7567 {
7568 /** @todo there must be a better way for this and boot cArgsX? */
7569 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7570 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7571 & ~pReNative->Core.bmHstRegsWithGstShadow
7572 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7573 & fNotArgsMask;
7574 if (fRegs)
7575 {
7576 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7577 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7578 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7579 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7580 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7581 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7582 }
7583 else
7584 {
7585 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7586 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7587 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7588 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7589 }
7590 }
7591 else
7592 {
7593 idxReg = idxRegPref;
7594 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7595 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7596 }
7597 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7598 pVar->idxReg = idxReg;
7599
7600#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7601 pVar->fSimdReg = false;
7602#endif
7603
7604 /*
7605 * Load it off the stack if we've got a stack slot.
7606 */
7607 uint8_t const idxStackSlot = pVar->idxStackSlot;
7608 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7609 {
7610 Assert(fInitialized);
7611 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7612 switch (pVar->cbVar)
7613 {
7614 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7615 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7616 case 3: AssertFailed(); RT_FALL_THRU();
7617 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7618 default: AssertFailed(); RT_FALL_THRU();
7619 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7620 }
7621 }
7622 else
7623 {
7624 Assert(idxStackSlot == UINT8_MAX);
7625 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7626 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7627 else
7628 {
7629 /*
7630 * Convert from immediate to stack/register. This is currently only
7631 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7632 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7633 */
7634 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7635 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7636 idxVar, idxReg, pVar->u.uValue));
7637 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7638 pVar->enmKind = kIemNativeVarKind_Stack;
7639 }
7640 }
7641
7642 pVar->fRegAcquired = true;
7643 return idxReg;
7644}
7645
7646
7647#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7648/**
7649 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7650 * fixed till we call iemNativeVarRegisterRelease.
7651 *
7652 * @returns The host register number.
7653 * @param pReNative The recompiler state.
7654 * @param idxVar The variable.
7655 * @param poff Pointer to the instruction buffer offset.
7656 * In case a register needs to be freed up or the value
7657 * loaded off the stack.
7658 * @param fInitialized Set if the variable must already have been initialized.
7659 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7660 * the case.
7661 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7662 */
7663DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7664 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7665{
7666 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7667 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7668 Assert( pVar->cbVar == sizeof(RTUINT128U)
7669 || pVar->cbVar == sizeof(RTUINT256U));
7670 Assert(!pVar->fRegAcquired);
7671
7672 uint8_t idxReg = pVar->idxReg;
7673 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7674 {
7675 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7676 && pVar->enmKind < kIemNativeVarKind_End);
7677 pVar->fRegAcquired = true;
7678 return idxReg;
7679 }
7680
7681 /*
7682 * If the kind of variable has not yet been set, default to 'stack'.
7683 */
7684 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7685 && pVar->enmKind < kIemNativeVarKind_End);
7686 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7687 iemNativeVarSetKindToStack(pReNative, idxVar);
7688
7689 /*
7690 * We have to allocate a register for the variable, even if its a stack one
7691 * as we don't know if there are modification being made to it before its
7692 * finalized (todo: analyze and insert hints about that?).
7693 *
7694 * If we can, we try get the correct register for argument variables. This
7695 * is assuming that most argument variables are fetched as close as possible
7696 * to the actual call, so that there aren't any interfering hidden calls
7697 * (memory accesses, etc) inbetween.
7698 *
7699 * If we cannot or it's a variable, we make sure no argument registers
7700 * that will be used by this MC block will be allocated here, and we always
7701 * prefer non-volatile registers to avoid needing to spill stuff for internal
7702 * call.
7703 */
7704 /** @todo Detect too early argument value fetches and warn about hidden
7705 * calls causing less optimal code to be generated in the python script. */
7706
7707 uint8_t const uArgNo = pVar->uArgNo;
7708 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7709
7710 /* SIMD is bit simpler for now because there is no support for arguments. */
7711 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7712 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7713 {
7714 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7715 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7716 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7717 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7718 & fNotArgsMask;
7719 if (fRegs)
7720 {
7721 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7722 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7723 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7724 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7725 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7726 }
7727 else
7728 {
7729 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7730 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7731 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7732 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7733 }
7734 }
7735 else
7736 {
7737 idxReg = idxRegPref;
7738 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7739 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7740 }
7741 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7742
7743 pVar->fSimdReg = true;
7744 pVar->idxReg = idxReg;
7745
7746 /*
7747 * Load it off the stack if we've got a stack slot.
7748 */
7749 uint8_t const idxStackSlot = pVar->idxStackSlot;
7750 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7751 {
7752 Assert(fInitialized);
7753 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7754 switch (pVar->cbVar)
7755 {
7756 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7757 default: AssertFailed(); RT_FALL_THRU();
7758 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7759 }
7760 }
7761 else
7762 {
7763 Assert(idxStackSlot == UINT8_MAX);
7764 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7765 }
7766 pVar->fRegAcquired = true;
7767 return idxReg;
7768}
7769#endif
7770
7771
7772/**
7773 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7774 * guest register.
7775 *
7776 * This function makes sure there is a register for it and sets it to be the
7777 * current shadow copy of @a enmGstReg.
7778 *
7779 * @returns The host register number.
7780 * @param pReNative The recompiler state.
7781 * @param idxVar The variable.
7782 * @param enmGstReg The guest register this variable will be written to
7783 * after this call.
7784 * @param poff Pointer to the instruction buffer offset.
7785 * In case a register needs to be freed up or if the
7786 * variable content needs to be loaded off the stack.
7787 *
7788 * @note We DO NOT expect @a idxVar to be an argument variable,
7789 * because we can only in the commit stage of an instruction when this
7790 * function is used.
7791 */
7792DECL_HIDDEN_THROW(uint8_t)
7793iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7794{
7795 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7796 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7797 Assert(!pVar->fRegAcquired);
7798 AssertMsgStmt( pVar->cbVar <= 8
7799 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7800 || pVar->enmKind == kIemNativeVarKind_Stack),
7801 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7802 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7803 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7804
7805 /*
7806 * This shouldn't ever be used for arguments, unless it's in a weird else
7807 * branch that doesn't do any calling and even then it's questionable.
7808 *
7809 * However, in case someone writes crazy wrong MC code and does register
7810 * updates before making calls, just use the regular register allocator to
7811 * ensure we get a register suitable for the intended argument number.
7812 */
7813 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7814
7815 /*
7816 * If there is already a register for the variable, we transfer/set the
7817 * guest shadow copy assignment to it.
7818 */
7819 uint8_t idxReg = pVar->idxReg;
7820 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7821 {
7822#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7823 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7824 {
7825# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7826 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7827 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7828# endif
7829 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7830 }
7831#endif
7832
7833 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7834 {
7835 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7836 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7837 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7838 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7839 }
7840 else
7841 {
7842 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7843 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7844 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7845 }
7846 /** @todo figure this one out. We need some way of making sure the register isn't
7847 * modified after this point, just in case we start writing crappy MC code. */
7848 pVar->enmGstReg = enmGstReg;
7849 pVar->fRegAcquired = true;
7850 return idxReg;
7851 }
7852 Assert(pVar->uArgNo == UINT8_MAX);
7853
7854 /*
7855 * Because this is supposed to be the commit stage, we're just tag along with the
7856 * temporary register allocator and upgrade it to a variable register.
7857 */
7858 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7859 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7860 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7861 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7862 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7863 pVar->idxReg = idxReg;
7864
7865 /*
7866 * Now we need to load the register value.
7867 */
7868 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7869 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7870 else
7871 {
7872 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7873 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7874 switch (pVar->cbVar)
7875 {
7876 case sizeof(uint64_t):
7877 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7878 break;
7879 case sizeof(uint32_t):
7880 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7881 break;
7882 case sizeof(uint16_t):
7883 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7884 break;
7885 case sizeof(uint8_t):
7886 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7887 break;
7888 default:
7889 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7890 }
7891 }
7892
7893 pVar->fRegAcquired = true;
7894 return idxReg;
7895}
7896
7897
7898/**
7899 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7900 *
7901 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7902 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7903 * requirement of flushing anything in volatile host registers when making a
7904 * call.
7905 *
7906 * @returns New @a off value.
7907 * @param pReNative The recompiler state.
7908 * @param off The code buffer position.
7909 * @param fHstRegsNotToSave Set of registers not to save & restore.
7910 */
7911DECL_HIDDEN_THROW(uint32_t)
7912iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7913{
7914 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7915 if (fHstRegs)
7916 {
7917 do
7918 {
7919 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7920 fHstRegs &= ~RT_BIT_32(idxHstReg);
7921
7922 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7923 {
7924 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7925 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7926 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7927 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7928 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7929 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7930 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7931 {
7932 case kIemNativeVarKind_Stack:
7933 {
7934 /* Temporarily spill the variable register. */
7935 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7936 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7937 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7938 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7939 continue;
7940 }
7941
7942 case kIemNativeVarKind_Immediate:
7943 case kIemNativeVarKind_VarRef:
7944 case kIemNativeVarKind_GstRegRef:
7945 /* It is weird to have any of these loaded at this point. */
7946 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7947 continue;
7948
7949 case kIemNativeVarKind_End:
7950 case kIemNativeVarKind_Invalid:
7951 break;
7952 }
7953 AssertFailed();
7954 }
7955 else
7956 {
7957 /*
7958 * Allocate a temporary stack slot and spill the register to it.
7959 */
7960 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7961 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7962 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7963 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7964 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7965 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7966 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7967 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7968 }
7969 } while (fHstRegs);
7970 }
7971#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7972
7973 /*
7974 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7975 * which would be more difficult due to spanning multiple stack slots and different sizes
7976 * (besides we only have a limited amount of slots at the moment).
7977 *
7978 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7979 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7980 */
7981 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7982
7983 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7984 if (fHstRegs)
7985 {
7986 do
7987 {
7988 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7989 fHstRegs &= ~RT_BIT_32(idxHstReg);
7990
7991 /* Fixed reserved and temporary registers don't need saving. */
7992 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7993 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7994 continue;
7995
7996 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7997
7998 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7999 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8000 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8001 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8002 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8003 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8004 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8005 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8006 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8007 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8008 {
8009 case kIemNativeVarKind_Stack:
8010 {
8011 /* Temporarily spill the variable register. */
8012 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8013 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8014 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8015 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8016 if (cbVar == sizeof(RTUINT128U))
8017 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8018 else
8019 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8020 continue;
8021 }
8022
8023 case kIemNativeVarKind_Immediate:
8024 case kIemNativeVarKind_VarRef:
8025 case kIemNativeVarKind_GstRegRef:
8026 /* It is weird to have any of these loaded at this point. */
8027 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8028 continue;
8029
8030 case kIemNativeVarKind_End:
8031 case kIemNativeVarKind_Invalid:
8032 break;
8033 }
8034 AssertFailed();
8035 } while (fHstRegs);
8036 }
8037#endif
8038 return off;
8039}
8040
8041
8042/**
8043 * Emit code to restore volatile registers after to a call to a helper.
8044 *
8045 * @returns New @a off value.
8046 * @param pReNative The recompiler state.
8047 * @param off The code buffer position.
8048 * @param fHstRegsNotToSave Set of registers not to save & restore.
8049 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8050 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8051 */
8052DECL_HIDDEN_THROW(uint32_t)
8053iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8054{
8055 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
8056 if (fHstRegs)
8057 {
8058 do
8059 {
8060 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8061 fHstRegs &= ~RT_BIT_32(idxHstReg);
8062
8063 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8064 {
8065 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8066 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8067 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8068 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8069 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8070 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8071 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8072 {
8073 case kIemNativeVarKind_Stack:
8074 {
8075 /* Unspill the variable register. */
8076 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8077 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8078 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8079 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8080 continue;
8081 }
8082
8083 case kIemNativeVarKind_Immediate:
8084 case kIemNativeVarKind_VarRef:
8085 case kIemNativeVarKind_GstRegRef:
8086 /* It is weird to have any of these loaded at this point. */
8087 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8088 continue;
8089
8090 case kIemNativeVarKind_End:
8091 case kIemNativeVarKind_Invalid:
8092 break;
8093 }
8094 AssertFailed();
8095 }
8096 else
8097 {
8098 /*
8099 * Restore from temporary stack slot.
8100 */
8101 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8102 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8103 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8104 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8105
8106 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8107 }
8108 } while (fHstRegs);
8109 }
8110#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8111 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8112 if (fHstRegs)
8113 {
8114 do
8115 {
8116 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8117 fHstRegs &= ~RT_BIT_32(idxHstReg);
8118
8119 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8120 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8121 continue;
8122 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8123
8124 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8126 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8127 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8128 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8129 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8130 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8131 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8132 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8133 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8134 {
8135 case kIemNativeVarKind_Stack:
8136 {
8137 /* Unspill the variable register. */
8138 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8139 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8140 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8141 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8142
8143 if (cbVar == sizeof(RTUINT128U))
8144 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8145 else
8146 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8147 continue;
8148 }
8149
8150 case kIemNativeVarKind_Immediate:
8151 case kIemNativeVarKind_VarRef:
8152 case kIemNativeVarKind_GstRegRef:
8153 /* It is weird to have any of these loaded at this point. */
8154 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8155 continue;
8156
8157 case kIemNativeVarKind_End:
8158 case kIemNativeVarKind_Invalid:
8159 break;
8160 }
8161 AssertFailed();
8162 } while (fHstRegs);
8163 }
8164#endif
8165 return off;
8166}
8167
8168
8169/**
8170 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8171 *
8172 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8173 *
8174 * ASSUMES that @a idxVar is valid and unpacked.
8175 */
8176DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8177{
8178 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8179 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8180 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8181 {
8182 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8183 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8184 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8185 Assert(cSlots > 0);
8186 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8187 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8188 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8189 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8190 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8191 }
8192 else
8193 Assert(idxStackSlot == UINT8_MAX);
8194}
8195
8196
8197/**
8198 * Worker that frees a single variable.
8199 *
8200 * ASSUMES that @a idxVar is valid and unpacked.
8201 */
8202DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8203{
8204 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8205 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8206 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8207
8208 /* Free the host register first if any assigned. */
8209 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8210#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8211 if ( idxHstReg != UINT8_MAX
8212 && pReNative->Core.aVars[idxVar].fSimdReg)
8213 {
8214 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8215 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8216 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8217 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8218 }
8219 else
8220#endif
8221 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8222 {
8223 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8224 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8225 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8226 }
8227
8228 /* Free argument mapping. */
8229 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8230 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8231 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8232
8233 /* Free the stack slots. */
8234 iemNativeVarFreeStackSlots(pReNative, idxVar);
8235
8236 /* Free the actual variable. */
8237 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8238 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8239}
8240
8241
8242/**
8243 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8244 */
8245DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8246{
8247 while (bmVars != 0)
8248 {
8249 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8250 bmVars &= ~RT_BIT_32(idxVar);
8251
8252#if 1 /** @todo optimize by simplifying this later... */
8253 iemNativeVarFreeOneWorker(pReNative, idxVar);
8254#else
8255 /* Only need to free the host register, the rest is done as bulk updates below. */
8256 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8257 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8258 {
8259 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8260 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8261 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8262 }
8263#endif
8264 }
8265#if 0 /** @todo optimize by simplifying this later... */
8266 pReNative->Core.bmVars = 0;
8267 pReNative->Core.bmStack = 0;
8268 pReNative->Core.u64ArgVars = UINT64_MAX;
8269#endif
8270}
8271
8272
8273
8274/*********************************************************************************************************************************
8275* Emitters for IEM_MC_CALL_CIMPL_XXX *
8276*********************************************************************************************************************************/
8277
8278/**
8279 * Emits code to load a reference to the given guest register into @a idxGprDst.
8280 */
8281DECL_HIDDEN_THROW(uint32_t)
8282iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8283 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8284{
8285#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8286 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8287#endif
8288
8289 /*
8290 * Get the offset relative to the CPUMCTX structure.
8291 */
8292 uint32_t offCpumCtx;
8293 switch (enmClass)
8294 {
8295 case kIemNativeGstRegRef_Gpr:
8296 Assert(idxRegInClass < 16);
8297 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8298 break;
8299
8300 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8301 Assert(idxRegInClass < 4);
8302 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8303 break;
8304
8305 case kIemNativeGstRegRef_EFlags:
8306 Assert(idxRegInClass == 0);
8307 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8308 break;
8309
8310 case kIemNativeGstRegRef_MxCsr:
8311 Assert(idxRegInClass == 0);
8312 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8313 break;
8314
8315 case kIemNativeGstRegRef_FpuReg:
8316 Assert(idxRegInClass < 8);
8317 AssertFailed(); /** @todo what kind of indexing? */
8318 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8319 break;
8320
8321 case kIemNativeGstRegRef_MReg:
8322 Assert(idxRegInClass < 8);
8323 AssertFailed(); /** @todo what kind of indexing? */
8324 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8325 break;
8326
8327 case kIemNativeGstRegRef_XReg:
8328 Assert(idxRegInClass < 16);
8329 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8330 break;
8331
8332 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8333 Assert(idxRegInClass == 0);
8334 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8335 break;
8336
8337 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8338 Assert(idxRegInClass == 0);
8339 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8340 break;
8341
8342 default:
8343 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8344 }
8345
8346 /*
8347 * Load the value into the destination register.
8348 */
8349#ifdef RT_ARCH_AMD64
8350 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8351
8352#elif defined(RT_ARCH_ARM64)
8353 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8354 Assert(offCpumCtx < 4096);
8355 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8356
8357#else
8358# error "Port me!"
8359#endif
8360
8361 return off;
8362}
8363
8364
8365/**
8366 * Common code for CIMPL and AIMPL calls.
8367 *
8368 * These are calls that uses argument variables and such. They should not be
8369 * confused with internal calls required to implement an MC operation,
8370 * like a TLB load and similar.
8371 *
8372 * Upon return all that is left to do is to load any hidden arguments and
8373 * perform the call. All argument variables are freed.
8374 *
8375 * @returns New code buffer offset; throws VBox status code on error.
8376 * @param pReNative The native recompile state.
8377 * @param off The code buffer offset.
8378 * @param cArgs The total nubmer of arguments (includes hidden
8379 * count).
8380 * @param cHiddenArgs The number of hidden arguments. The hidden
8381 * arguments must not have any variable declared for
8382 * them, whereas all the regular arguments must
8383 * (tstIEMCheckMc ensures this).
8384 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8385 * this will still flush pending writes in call volatile registers if false.
8386 */
8387DECL_HIDDEN_THROW(uint32_t)
8388iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8389 bool fFlushPendingWrites /*= true*/)
8390{
8391#ifdef VBOX_STRICT
8392 /*
8393 * Assert sanity.
8394 */
8395 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8396 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8397 for (unsigned i = 0; i < cHiddenArgs; i++)
8398 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8399 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8400 {
8401 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8402 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8403 }
8404 iemNativeRegAssertSanity(pReNative);
8405#endif
8406
8407 /* We don't know what the called function makes use of, so flush any pending register writes. */
8408 RT_NOREF(fFlushPendingWrites);
8409#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8410 if (fFlushPendingWrites)
8411#endif
8412 off = iemNativeRegFlushPendingWrites(pReNative, off);
8413
8414 /*
8415 * Before we do anything else, go over variables that are referenced and
8416 * make sure they are not in a register.
8417 */
8418 uint32_t bmVars = pReNative->Core.bmVars;
8419 if (bmVars)
8420 {
8421 do
8422 {
8423 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8424 bmVars &= ~RT_BIT_32(idxVar);
8425
8426 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8427 {
8428 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8429#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8430 if ( idxRegOld != UINT8_MAX
8431 && pReNative->Core.aVars[idxVar].fSimdReg)
8432 {
8433 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8434 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8435
8436 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8437 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8438 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8439 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8440 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8441 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8442 else
8443 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8444
8445 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8446 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8447
8448 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8449 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8450 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8451 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8452 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8453 }
8454 else
8455#endif
8456 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8457 {
8458 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8459 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8460 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8461 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8462 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8463
8464 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8465 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8466 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8467 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8468 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8469 }
8470 }
8471 } while (bmVars != 0);
8472#if 0 //def VBOX_STRICT
8473 iemNativeRegAssertSanity(pReNative);
8474#endif
8475 }
8476
8477 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8478
8479#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8480 /*
8481 * At the very first step go over the host registers that will be used for arguments
8482 * don't shadow anything which needs writing back first.
8483 */
8484 for (uint32_t i = 0; i < cRegArgs; i++)
8485 {
8486 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8487
8488 /* Writeback any dirty guest shadows before using this register. */
8489 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8490 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8491 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8492 }
8493#endif
8494
8495 /*
8496 * First, go over the host registers that will be used for arguments and make
8497 * sure they either hold the desired argument or are free.
8498 */
8499 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8500 {
8501 for (uint32_t i = 0; i < cRegArgs; i++)
8502 {
8503 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8504 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8505 {
8506 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8507 {
8508 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8509 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8510 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8511 Assert(pVar->idxReg == idxArgReg);
8512 uint8_t const uArgNo = pVar->uArgNo;
8513 if (uArgNo == i)
8514 { /* prefect */ }
8515 /* The variable allocator logic should make sure this is impossible,
8516 except for when the return register is used as a parameter (ARM,
8517 but not x86). */
8518#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8519 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8520 {
8521# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8522# error "Implement this"
8523# endif
8524 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8525 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8526 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8527 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8528 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8529 }
8530#endif
8531 else
8532 {
8533 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8534
8535 if (pVar->enmKind == kIemNativeVarKind_Stack)
8536 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8537 else
8538 {
8539 /* just free it, can be reloaded if used again */
8540 pVar->idxReg = UINT8_MAX;
8541 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8542 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8543 }
8544 }
8545 }
8546 else
8547 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8548 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8549 }
8550 }
8551#if 0 //def VBOX_STRICT
8552 iemNativeRegAssertSanity(pReNative);
8553#endif
8554 }
8555
8556 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8557
8558#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8559 /*
8560 * If there are any stack arguments, make sure they are in their place as well.
8561 *
8562 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8563 * the caller) be loading it later and it must be free (see first loop).
8564 */
8565 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8566 {
8567 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8568 {
8569 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8570 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8571 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8572 {
8573 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8574 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8575 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8576 pVar->idxReg = UINT8_MAX;
8577 }
8578 else
8579 {
8580 /* Use ARG0 as temp for stuff we need registers for. */
8581 switch (pVar->enmKind)
8582 {
8583 case kIemNativeVarKind_Stack:
8584 {
8585 uint8_t const idxStackSlot = pVar->idxStackSlot;
8586 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8587 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8588 iemNativeStackCalcBpDisp(idxStackSlot));
8589 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8590 continue;
8591 }
8592
8593 case kIemNativeVarKind_Immediate:
8594 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8595 continue;
8596
8597 case kIemNativeVarKind_VarRef:
8598 {
8599 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8600 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8601 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8602 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8603 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8604# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8605 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8606 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8607 if ( fSimdReg
8608 && idxRegOther != UINT8_MAX)
8609 {
8610 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8611 if (cbVar == sizeof(RTUINT128U))
8612 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8613 else
8614 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8615 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8616 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8617 }
8618 else
8619# endif
8620 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8621 {
8622 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8623 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8624 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8625 }
8626 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8627 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8628 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8629 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8630 continue;
8631 }
8632
8633 case kIemNativeVarKind_GstRegRef:
8634 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8635 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8636 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8637 continue;
8638
8639 case kIemNativeVarKind_Invalid:
8640 case kIemNativeVarKind_End:
8641 break;
8642 }
8643 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8644 }
8645 }
8646# if 0 //def VBOX_STRICT
8647 iemNativeRegAssertSanity(pReNative);
8648# endif
8649 }
8650#else
8651 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8652#endif
8653
8654 /*
8655 * Make sure the argument variables are loaded into their respective registers.
8656 *
8657 * We can optimize this by ASSUMING that any register allocations are for
8658 * registeres that have already been loaded and are ready. The previous step
8659 * saw to that.
8660 */
8661 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8662 {
8663 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8664 {
8665 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8666 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8667 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8668 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8669 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8670 else
8671 {
8672 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8673 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8674 {
8675 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8676 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8677 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8678 | RT_BIT_32(idxArgReg);
8679 pVar->idxReg = idxArgReg;
8680 }
8681 else
8682 {
8683 /* Use ARG0 as temp for stuff we need registers for. */
8684 switch (pVar->enmKind)
8685 {
8686 case kIemNativeVarKind_Stack:
8687 {
8688 uint8_t const idxStackSlot = pVar->idxStackSlot;
8689 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8690 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8691 continue;
8692 }
8693
8694 case kIemNativeVarKind_Immediate:
8695 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8696 continue;
8697
8698 case kIemNativeVarKind_VarRef:
8699 {
8700 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8701 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8702 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8703 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8704 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8705 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8706#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8707 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8708 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8709 if ( fSimdReg
8710 && idxRegOther != UINT8_MAX)
8711 {
8712 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8713 if (cbVar == sizeof(RTUINT128U))
8714 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8715 else
8716 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8717 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8718 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8719 }
8720 else
8721#endif
8722 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8723 {
8724 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8725 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8726 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8727 }
8728 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8729 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8730 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8731 continue;
8732 }
8733
8734 case kIemNativeVarKind_GstRegRef:
8735 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8736 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8737 continue;
8738
8739 case kIemNativeVarKind_Invalid:
8740 case kIemNativeVarKind_End:
8741 break;
8742 }
8743 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8744 }
8745 }
8746 }
8747#if 0 //def VBOX_STRICT
8748 iemNativeRegAssertSanity(pReNative);
8749#endif
8750 }
8751#ifdef VBOX_STRICT
8752 else
8753 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8754 {
8755 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8756 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8757 }
8758#endif
8759
8760 /*
8761 * Free all argument variables (simplified).
8762 * Their lifetime always expires with the call they are for.
8763 */
8764 /** @todo Make the python script check that arguments aren't used after
8765 * IEM_MC_CALL_XXXX. */
8766 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8767 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8768 * an argument value. There is also some FPU stuff. */
8769 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8770 {
8771 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8772 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8773
8774 /* no need to free registers: */
8775 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8776 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8777 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8778 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8779 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8780 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8781
8782 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8783 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8784 iemNativeVarFreeStackSlots(pReNative, idxVar);
8785 }
8786 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8787
8788 /*
8789 * Flush volatile registers as we make the call.
8790 */
8791 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8792
8793 return off;
8794}
8795
8796
8797
8798/*********************************************************************************************************************************
8799* TLB Lookup. *
8800*********************************************************************************************************************************/
8801
8802/**
8803 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8804 */
8805DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8806{
8807 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8808 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8809 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8810 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8811 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8812
8813 /* Do the lookup manually. */
8814 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8815 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8816 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8817 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8818 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8819 {
8820 /*
8821 * Check TLB page table level access flags.
8822 */
8823 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8824 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8825 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8826 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8827 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8828 | IEMTLBE_F_PG_UNASSIGNED
8829 | IEMTLBE_F_PT_NO_ACCESSED
8830 | fNoWriteNoDirty | fNoUser);
8831 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8832 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8833 {
8834 /*
8835 * Return the address.
8836 */
8837 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8838 if ((uintptr_t)pbAddr == uResult)
8839 return;
8840 RT_NOREF(cbMem);
8841 AssertFailed();
8842 }
8843 else
8844 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8845 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8846 }
8847 else
8848 AssertFailed();
8849 RT_BREAKPOINT();
8850}
8851
8852/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8853
8854
8855
8856/*********************************************************************************************************************************
8857* Recompiler Core. *
8858*********************************************************************************************************************************/
8859
8860/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8861static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8862{
8863 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8864 pDis->cbCachedInstr += cbMaxRead;
8865 RT_NOREF(cbMinRead);
8866 return VERR_NO_DATA;
8867}
8868
8869
8870DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8871{
8872 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8873 {
8874#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8875 ENTRY(fLocalForcedActions),
8876 ENTRY(iem.s.rcPassUp),
8877 ENTRY(iem.s.fExec),
8878 ENTRY(iem.s.pbInstrBuf),
8879 ENTRY(iem.s.uInstrBufPc),
8880 ENTRY(iem.s.GCPhysInstrBuf),
8881 ENTRY(iem.s.cbInstrBufTotal),
8882 ENTRY(iem.s.idxTbCurInstr),
8883 ENTRY(iem.s.fSkippingEFlags),
8884#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8885 ENTRY(iem.s.uPcUpdatingDebug),
8886#endif
8887#ifdef VBOX_WITH_STATISTICS
8888 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8889 ENTRY(iem.s.StatNativeTlbHitsForStore),
8890 ENTRY(iem.s.StatNativeTlbHitsForStack),
8891 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8892 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8893 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8894 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8895 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8896#endif
8897 ENTRY(iem.s.DataTlb.uTlbRevision),
8898 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8899 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8900 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8901 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8902 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8903 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8904 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8905 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8906 ENTRY(iem.s.DataTlb.aEntries),
8907 ENTRY(iem.s.CodeTlb.uTlbRevision),
8908 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8909 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8910 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8911 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8912 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8913 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8914 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8915 ENTRY(iem.s.CodeTlb.aEntries),
8916 ENTRY(pVMR3),
8917 ENTRY(cpum.GstCtx.rax),
8918 ENTRY(cpum.GstCtx.ah),
8919 ENTRY(cpum.GstCtx.rcx),
8920 ENTRY(cpum.GstCtx.ch),
8921 ENTRY(cpum.GstCtx.rdx),
8922 ENTRY(cpum.GstCtx.dh),
8923 ENTRY(cpum.GstCtx.rbx),
8924 ENTRY(cpum.GstCtx.bh),
8925 ENTRY(cpum.GstCtx.rsp),
8926 ENTRY(cpum.GstCtx.rbp),
8927 ENTRY(cpum.GstCtx.rsi),
8928 ENTRY(cpum.GstCtx.rdi),
8929 ENTRY(cpum.GstCtx.r8),
8930 ENTRY(cpum.GstCtx.r9),
8931 ENTRY(cpum.GstCtx.r10),
8932 ENTRY(cpum.GstCtx.r11),
8933 ENTRY(cpum.GstCtx.r12),
8934 ENTRY(cpum.GstCtx.r13),
8935 ENTRY(cpum.GstCtx.r14),
8936 ENTRY(cpum.GstCtx.r15),
8937 ENTRY(cpum.GstCtx.es.Sel),
8938 ENTRY(cpum.GstCtx.es.u64Base),
8939 ENTRY(cpum.GstCtx.es.u32Limit),
8940 ENTRY(cpum.GstCtx.es.Attr),
8941 ENTRY(cpum.GstCtx.cs.Sel),
8942 ENTRY(cpum.GstCtx.cs.u64Base),
8943 ENTRY(cpum.GstCtx.cs.u32Limit),
8944 ENTRY(cpum.GstCtx.cs.Attr),
8945 ENTRY(cpum.GstCtx.ss.Sel),
8946 ENTRY(cpum.GstCtx.ss.u64Base),
8947 ENTRY(cpum.GstCtx.ss.u32Limit),
8948 ENTRY(cpum.GstCtx.ss.Attr),
8949 ENTRY(cpum.GstCtx.ds.Sel),
8950 ENTRY(cpum.GstCtx.ds.u64Base),
8951 ENTRY(cpum.GstCtx.ds.u32Limit),
8952 ENTRY(cpum.GstCtx.ds.Attr),
8953 ENTRY(cpum.GstCtx.fs.Sel),
8954 ENTRY(cpum.GstCtx.fs.u64Base),
8955 ENTRY(cpum.GstCtx.fs.u32Limit),
8956 ENTRY(cpum.GstCtx.fs.Attr),
8957 ENTRY(cpum.GstCtx.gs.Sel),
8958 ENTRY(cpum.GstCtx.gs.u64Base),
8959 ENTRY(cpum.GstCtx.gs.u32Limit),
8960 ENTRY(cpum.GstCtx.gs.Attr),
8961 ENTRY(cpum.GstCtx.rip),
8962 ENTRY(cpum.GstCtx.eflags),
8963 ENTRY(cpum.GstCtx.uRipInhibitInt),
8964 ENTRY(cpum.GstCtx.cr0),
8965 ENTRY(cpum.GstCtx.cr4),
8966 ENTRY(cpum.GstCtx.aXcr[0]),
8967 ENTRY(cpum.GstCtx.aXcr[1]),
8968#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8969 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8970 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8971 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8972 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8973 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8974 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8975 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8976 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8977 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8978 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8979 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8980 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8981 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8982 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8983 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8984 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8985 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8986 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8987 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8988 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8989 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8990 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8991 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8992 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8993 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8994 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8995 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8996 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8997 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8998 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8999 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9000 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9001 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9002#endif
9003#undef ENTRY
9004 };
9005#ifdef VBOX_STRICT
9006 static bool s_fOrderChecked = false;
9007 if (!s_fOrderChecked)
9008 {
9009 s_fOrderChecked = true;
9010 uint32_t offPrev = s_aMembers[0].off;
9011 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9012 {
9013 Assert(s_aMembers[i].off > offPrev);
9014 offPrev = s_aMembers[i].off;
9015 }
9016 }
9017#endif
9018
9019 /*
9020 * Binary lookup.
9021 */
9022 unsigned iStart = 0;
9023 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9024 for (;;)
9025 {
9026 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9027 uint32_t const offCur = s_aMembers[iCur].off;
9028 if (off < offCur)
9029 {
9030 if (iCur != iStart)
9031 iEnd = iCur;
9032 else
9033 break;
9034 }
9035 else if (off > offCur)
9036 {
9037 if (iCur + 1 < iEnd)
9038 iStart = iCur + 1;
9039 else
9040 break;
9041 }
9042 else
9043 return s_aMembers[iCur].pszName;
9044 }
9045#ifdef VBOX_WITH_STATISTICS
9046 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9047 return "iem.s.acThreadedFuncStats[iFn]";
9048#endif
9049 return NULL;
9050}
9051
9052
9053/**
9054 * Translates a label to a name.
9055 */
9056static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
9057{
9058 switch (enmLabel)
9059 {
9060#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
9061 STR_CASE_CMN(Invalid);
9062 STR_CASE_CMN(RaiseDe);
9063 STR_CASE_CMN(RaiseUd);
9064 STR_CASE_CMN(RaiseSseRelated);
9065 STR_CASE_CMN(RaiseAvxRelated);
9066 STR_CASE_CMN(RaiseSseAvxFpRelated);
9067 STR_CASE_CMN(RaiseNm);
9068 STR_CASE_CMN(RaiseGp0);
9069 STR_CASE_CMN(RaiseMf);
9070 STR_CASE_CMN(RaiseXf);
9071 STR_CASE_CMN(ObsoleteTb);
9072 STR_CASE_CMN(NeedCsLimChecking);
9073 STR_CASE_CMN(CheckBranchMiss);
9074#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9075 STR_CASE_CMN(ReturnSuccess);
9076#else
9077 STR_CASE_CMN(Return);
9078#endif
9079 STR_CASE_CMN(ReturnBreak);
9080 STR_CASE_CMN(ReturnBreakFF);
9081 STR_CASE_CMN(ReturnWithFlags);
9082 STR_CASE_CMN(ReturnBreakViaLookup);
9083 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
9084 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
9085 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
9086 STR_CASE_CMN(NonZeroRetOrPassUp);
9087#undef STR_CASE_CMN
9088#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
9089 STR_CASE_LBL(LoopJumpTarget);
9090 STR_CASE_LBL(If);
9091 STR_CASE_LBL(Else);
9092 STR_CASE_LBL(Endif);
9093 STR_CASE_LBL(CheckIrq);
9094 STR_CASE_LBL(TlbLookup);
9095 STR_CASE_LBL(TlbMiss);
9096 STR_CASE_LBL(TlbDone);
9097 case kIemNativeLabelType_End: break;
9098 }
9099 return NULL;
9100}
9101
9102
9103/** Info for the symbols resolver used when disassembling. */
9104typedef struct IEMNATIVDISASMSYMCTX
9105{
9106 PVMCPU pVCpu;
9107 PCIEMTB pTb;
9108# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9109 PCIEMNATIVEPERCHUNKCTX pCtx;
9110# endif
9111# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9112 PCIEMTBDBG pDbgInfo;
9113# endif
9114} IEMNATIVDISASMSYMCTX;
9115typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9116
9117
9118/**
9119 * Resolve address to symbol, if we can.
9120 */
9121static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9122{
9123#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
9124 PCIEMTB const pTb = pSymCtx->pTb;
9125 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9126 if (offNative <= pTb->Native.cInstructions)
9127 {
9128# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9129 /*
9130 * Scan debug info for a matching label.
9131 * Since the debug info should be 100% linear, we can do a binary search here.
9132 */
9133 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9134 if (pDbgInfo)
9135 {
9136 uint32_t const cEntries = pDbgInfo->cEntries;
9137 uint32_t idxEnd = cEntries;
9138 uint32_t idxStart = 0;
9139 for (;;)
9140 {
9141 /* Find a NativeOffset record close to the midpoint. */
9142 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9143 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9144 idx--;
9145 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9146 {
9147 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9148 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9149 idx++;
9150 if (idx >= idxEnd)
9151 break;
9152 }
9153
9154 /* Do the binary searching thing. */
9155 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9156 {
9157 if (idx > idxStart)
9158 idxEnd = idx;
9159 else
9160 break;
9161 }
9162 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9163 {
9164 idx += 1;
9165 if (idx < idxEnd)
9166 idxStart = idx;
9167 else
9168 break;
9169 }
9170 else
9171 {
9172 /* Got a matching offset, scan forward till we hit a label, but
9173 stop when the native offset changes. */
9174 while (++idx < cEntries)
9175 switch (pDbgInfo->aEntries[idx].Gen.uType)
9176 {
9177 case kIemTbDbgEntryType_Label:
9178 {
9179 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9180 const char * const pszName = iemNativeGetLabelName(enmLabel);
9181 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9182 return pszName;
9183 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9184 return pszBuf;
9185 }
9186
9187 case kIemTbDbgEntryType_NativeOffset:
9188 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9189 return NULL;
9190 break;
9191 }
9192 break;
9193 }
9194 }
9195 }
9196# endif
9197 }
9198# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9199 else
9200 {
9201 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9202 if (pChunkCtx)
9203 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9204 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9205 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9206 }
9207# endif
9208#endif
9209 RT_NOREF(pSymCtx, uAddress, pszBuf, cbBuf);
9210 return NULL;
9211}
9212
9213#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9214
9215/**
9216 * @callback_method_impl{FNDISGETSYMBOL}
9217 */
9218static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9219 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9220{
9221 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9222 if (pszSym)
9223 {
9224 *poff = 0;
9225 if (pszSym != pszBuf)
9226 return RTStrCopy(pszBuf, cchBuf, pszSym);
9227 return VINF_SUCCESS;
9228 }
9229 RT_NOREF(pDis, u32Sel);
9230 return VERR_SYMBOL_NOT_FOUND;
9231}
9232
9233#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9234
9235/**
9236 * Annotates an instruction decoded by the capstone disassembler.
9237 */
9238static const char *
9239iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9240{
9241# if defined(RT_ARCH_ARM64)
9242 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9243 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9244 {
9245 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9246 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9247 char const *psz = strchr(pInstr->op_str, '[');
9248 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9249 {
9250 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9251 int32_t off = -1;
9252 psz += 4;
9253 if (*psz == ']')
9254 off = 0;
9255 else if (*psz == ',')
9256 {
9257 psz = RTStrStripL(psz + 1);
9258 if (*psz == '#')
9259 off = RTStrToInt32(&psz[1]);
9260 /** @todo deal with index registers and LSL as well... */
9261 }
9262 if (off >= 0)
9263 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9264 }
9265 }
9266 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9267 {
9268 const char *pszAddr = strchr(pInstr->op_str, '#');
9269 if (pszAddr)
9270 {
9271 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9272 if (uAddr != 0)
9273 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9274 }
9275 }
9276# endif
9277 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9278 return NULL;
9279}
9280#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9281
9282
9283DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9284{
9285 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9286#if defined(RT_ARCH_AMD64)
9287 static const char * const a_apszMarkers[] =
9288 {
9289 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9290 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9291 };
9292#endif
9293
9294 char szDisBuf[512];
9295 DISSTATE Dis;
9296 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9297 uint32_t const cNative = pTb->Native.cInstructions;
9298 uint32_t offNative = 0;
9299#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9300 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9301#endif
9302 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9303 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9304 : DISCPUMODE_64BIT;
9305#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9306# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9307 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9308# else
9309 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9310# endif
9311#elif defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
9312 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, pDbgInfo };
9313#else
9314 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb };
9315#endif
9316#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9317 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9318#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9319 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9320#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9321# error "Port me"
9322#else
9323 csh hDisasm = ~(size_t)0;
9324# if defined(RT_ARCH_AMD64)
9325 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9326# elif defined(RT_ARCH_ARM64)
9327 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9328# else
9329# error "Port me"
9330# endif
9331 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9332
9333 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9334 //Assert(rcCs == CS_ERR_OK);
9335#endif
9336
9337 /*
9338 * Print TB info.
9339 */
9340 pHlp->pfnPrintf(pHlp,
9341 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9342 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9343 pTb, pTb->GCPhysPc,
9344#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9345 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9346#else
9347 pTb->FlatPc,
9348#endif
9349 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9350 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9351#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9352 if (pDbgInfo && pDbgInfo->cEntries > 1)
9353 {
9354 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9355
9356 /*
9357 * This disassembly is driven by the debug info which follows the native
9358 * code and indicates when it starts with the next guest instructions,
9359 * where labels are and such things.
9360 */
9361 uint32_t idxThreadedCall = 0;
9362 uint32_t idxGuestInstr = 0;
9363 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9364 uint8_t idxRange = UINT8_MAX;
9365 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9366 uint32_t offRange = 0;
9367 uint32_t offOpcodes = 0;
9368 uint32_t const cbOpcodes = pTb->cbOpcodes;
9369 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9370 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9371 uint32_t iDbgEntry = 1;
9372 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9373
9374 while (offNative < cNative)
9375 {
9376 /* If we're at or have passed the point where the next chunk of debug
9377 info starts, process it. */
9378 if (offDbgNativeNext <= offNative)
9379 {
9380 offDbgNativeNext = UINT32_MAX;
9381 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9382 {
9383 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9384 {
9385 case kIemTbDbgEntryType_GuestInstruction:
9386 {
9387 /* Did the exec flag change? */
9388 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9389 {
9390 pHlp->pfnPrintf(pHlp,
9391 " fExec change %#08x -> %#08x %s\n",
9392 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9393 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9394 szDisBuf, sizeof(szDisBuf)));
9395 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9396 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9397 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9398 : DISCPUMODE_64BIT;
9399 }
9400
9401 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9402 where the compilation was aborted before the opcode was recorded and the actual
9403 instruction was translated to a threaded call. This may happen when we run out
9404 of ranges, or when some complicated interrupts/FFs are found to be pending or
9405 similar. So, we just deal with it here rather than in the compiler code as it
9406 is a lot simpler to do here. */
9407 if ( idxRange == UINT8_MAX
9408 || idxRange >= cRanges
9409 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9410 {
9411 idxRange += 1;
9412 if (idxRange < cRanges)
9413 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9414 else
9415 continue;
9416 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9417 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9418 + (pTb->aRanges[idxRange].idxPhysPage == 0
9419 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9420 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9421 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9422 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9423 pTb->aRanges[idxRange].idxPhysPage);
9424 GCPhysPc += offRange;
9425 }
9426
9427 /* Disassemble the instruction. */
9428 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9429 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9430 uint32_t cbInstr = 1;
9431 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9432 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9433 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9434 if (RT_SUCCESS(rc))
9435 {
9436 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9437 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9438 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9439 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9440
9441 static unsigned const s_offMarker = 55;
9442 static char const s_szMarker[] = " ; <--- guest";
9443 if (cch < s_offMarker)
9444 {
9445 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9446 cch = s_offMarker;
9447 }
9448 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9449 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9450
9451 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9452 }
9453 else
9454 {
9455 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9456 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9457 cbInstr = 1;
9458 }
9459 idxGuestInstr++;
9460 GCPhysPc += cbInstr;
9461 offOpcodes += cbInstr;
9462 offRange += cbInstr;
9463 continue;
9464 }
9465
9466 case kIemTbDbgEntryType_ThreadedCall:
9467 pHlp->pfnPrintf(pHlp,
9468 " Call #%u to %s (%u args) - %s\n",
9469 idxThreadedCall,
9470 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9471 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9472 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9473 idxThreadedCall++;
9474 continue;
9475
9476 case kIemTbDbgEntryType_GuestRegShadowing:
9477 {
9478 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9479 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9480 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9481 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9482 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9483 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9484 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9485 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9486 else
9487 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9488 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9489 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9490 continue;
9491 }
9492
9493#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9494 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9495 {
9496 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9497 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9498 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9499 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9500 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9501 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9502 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9503 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9504 else
9505 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9506 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9507 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9508 continue;
9509 }
9510#endif
9511
9512 case kIemTbDbgEntryType_Label:
9513 {
9514 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9515 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9516 {
9517 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9518 ? " ; regs state restored pre-if-block" : "";
9519 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9520 }
9521 else
9522 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9523 continue;
9524 }
9525
9526 case kIemTbDbgEntryType_NativeOffset:
9527 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9528 Assert(offDbgNativeNext >= offNative);
9529 break;
9530
9531#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9532 case kIemTbDbgEntryType_DelayedPcUpdate:
9533 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9534 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9535 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9536 continue;
9537#endif
9538
9539#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9540 case kIemTbDbgEntryType_GuestRegDirty:
9541 {
9542 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9543 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9544 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9545 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9546 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9547 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9548 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9549 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9550 pszGstReg, pszHstReg);
9551 continue;
9552 }
9553
9554 case kIemTbDbgEntryType_GuestRegWriteback:
9555 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9556 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9557 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9558 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9559 continue;
9560#endif
9561
9562 default:
9563 AssertFailed();
9564 }
9565 iDbgEntry++;
9566 break;
9567 }
9568 }
9569
9570 /*
9571 * Disassemble the next native instruction.
9572 */
9573 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9574# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9575 uint32_t cbInstr = sizeof(paNative[0]);
9576 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9577 if (RT_SUCCESS(rc))
9578 {
9579# if defined(RT_ARCH_AMD64)
9580 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9581 {
9582 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9583 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9584 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9585 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9586 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9587 uInfo & 0x8000 ? "recompiled" : "todo");
9588 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9589 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9590 else
9591 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9592 }
9593 else
9594# endif
9595 {
9596 const char *pszAnnotation = NULL;
9597# ifdef RT_ARCH_AMD64
9598 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9599 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9600 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9601 iemNativeDisasmGetSymbolCb, &SymCtx);
9602 PCDISOPPARAM pMemOp;
9603 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9604 pMemOp = &Dis.aParams[0];
9605 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9606 pMemOp = &Dis.aParams[1];
9607 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9608 pMemOp = &Dis.aParams[2];
9609 else
9610 pMemOp = NULL;
9611 if ( pMemOp
9612 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9613 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9614 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9615 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9616
9617# elif defined(RT_ARCH_ARM64)
9618 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9619 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9620 iemNativeDisasmGetSymbolCb, &SymCtx);
9621# else
9622# error "Port me"
9623# endif
9624 if (pszAnnotation)
9625 {
9626 static unsigned const s_offAnnotation = 55;
9627 size_t const cchAnnotation = strlen(pszAnnotation);
9628 size_t cchDis = strlen(szDisBuf);
9629 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9630 {
9631 if (cchDis < s_offAnnotation)
9632 {
9633 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9634 cchDis = s_offAnnotation;
9635 }
9636 szDisBuf[cchDis++] = ' ';
9637 szDisBuf[cchDis++] = ';';
9638 szDisBuf[cchDis++] = ' ';
9639 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9640 }
9641 }
9642 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9643 }
9644 }
9645 else
9646 {
9647# if defined(RT_ARCH_AMD64)
9648 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9649 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9650# elif defined(RT_ARCH_ARM64)
9651 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9652# else
9653# error "Port me"
9654# endif
9655 cbInstr = sizeof(paNative[0]);
9656 }
9657 offNative += cbInstr / sizeof(paNative[0]);
9658
9659# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9660 cs_insn *pInstr;
9661 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9662 (uintptr_t)pNativeCur, 1, &pInstr);
9663 if (cInstrs > 0)
9664 {
9665 Assert(cInstrs == 1);
9666 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9667 size_t const cchOp = strlen(pInstr->op_str);
9668# if defined(RT_ARCH_AMD64)
9669 if (pszAnnotation)
9670 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9671 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9672 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9673 else
9674 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9675 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9676
9677# else
9678 if (pszAnnotation)
9679 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9680 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9681 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9682 else
9683 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9684 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9685# endif
9686 offNative += pInstr->size / sizeof(*pNativeCur);
9687 cs_free(pInstr, cInstrs);
9688 }
9689 else
9690 {
9691# if defined(RT_ARCH_AMD64)
9692 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9693 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9694# else
9695 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9696# endif
9697 offNative++;
9698 }
9699# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9700 }
9701 }
9702 else
9703#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9704 {
9705 /*
9706 * No debug info, just disassemble the x86 code and then the native code.
9707 *
9708 * First the guest code:
9709 */
9710 for (unsigned i = 0; i < pTb->cRanges; i++)
9711 {
9712 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9713 + (pTb->aRanges[i].idxPhysPage == 0
9714 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9715 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9716 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9717 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9718 unsigned off = pTb->aRanges[i].offOpcodes;
9719 /** @todo this ain't working when crossing pages! */
9720 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9721 while (off < cbOpcodes)
9722 {
9723 uint32_t cbInstr = 1;
9724 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9725 &pTb->pabOpcodes[off], cbOpcodes - off,
9726 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9727 if (RT_SUCCESS(rc))
9728 {
9729 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9730 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9731 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9732 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9733 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9734 GCPhysPc += cbInstr;
9735 off += cbInstr;
9736 }
9737 else
9738 {
9739 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9740 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9741 break;
9742 }
9743 }
9744 }
9745
9746 /*
9747 * Then the native code:
9748 */
9749 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9750 while (offNative < cNative)
9751 {
9752 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9753# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9754 uint32_t cbInstr = sizeof(paNative[0]);
9755 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9756 if (RT_SUCCESS(rc))
9757 {
9758# if defined(RT_ARCH_AMD64)
9759 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9760 {
9761 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9762 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9763 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9764 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9765 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9766 uInfo & 0x8000 ? "recompiled" : "todo");
9767 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9768 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9769 else
9770 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9771 }
9772 else
9773# endif
9774 {
9775# ifdef RT_ARCH_AMD64
9776 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9777 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9778 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9779 iemNativeDisasmGetSymbolCb, &SymCtx);
9780# elif defined(RT_ARCH_ARM64)
9781 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9782 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9783 iemNativeDisasmGetSymbolCb, &SymCtx);
9784# else
9785# error "Port me"
9786# endif
9787 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9788 }
9789 }
9790 else
9791 {
9792# if defined(RT_ARCH_AMD64)
9793 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9794 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9795# else
9796 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9797# endif
9798 cbInstr = sizeof(paNative[0]);
9799 }
9800 offNative += cbInstr / sizeof(paNative[0]);
9801
9802# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9803 cs_insn *pInstr;
9804 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9805 (uintptr_t)pNativeCur, 1, &pInstr);
9806 if (cInstrs > 0)
9807 {
9808 Assert(cInstrs == 1);
9809 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9810 size_t const cchOp = strlen(pInstr->op_str);
9811# if defined(RT_ARCH_AMD64)
9812 if (pszAnnotation)
9813 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9814 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9815 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9816 else
9817 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9818 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9819
9820# else
9821 if (pszAnnotation)
9822 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9823 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9824 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9825 else
9826 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9827 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9828# endif
9829 offNative += pInstr->size / sizeof(*pNativeCur);
9830 cs_free(pInstr, cInstrs);
9831 }
9832 else
9833 {
9834# if defined(RT_ARCH_AMD64)
9835 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9836 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9837# else
9838 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9839# endif
9840 offNative++;
9841 }
9842# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9843 }
9844 }
9845
9846#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9847 /* Cleanup. */
9848 cs_close(&hDisasm);
9849#endif
9850}
9851
9852
9853#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9854
9855/** Emit alignment padding between labels / functions. */
9856DECL_INLINE_THROW(uint32_t)
9857iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9858{
9859 if (off & fAlignMask)
9860 {
9861 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9862 while (off & fAlignMask)
9863# if defined(RT_ARCH_AMD64)
9864 pCodeBuf[off++] = 0xcc;
9865# elif defined(RT_ARCH_ARM64)
9866 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9867# else
9868# error "port me"
9869# endif
9870 }
9871 return off;
9872}
9873
9874
9875/**
9876 * Called when a new chunk is allocate to emit common per-chunk code.
9877 *
9878 * Allocates a per-chunk context directly from the chunk itself and place the
9879 * common code there.
9880 *
9881 * @returns Pointer to the chunk context start.
9882 * @param pVCpu The cross context virtual CPU structure of the calling
9883 * thread.
9884 * @param idxChunk The index of the chunk being added and requiring a
9885 * common code context.
9886 */
9887DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk)
9888{
9889 /*
9890 * Allocate a new recompiler state (since we're likely to be called while
9891 * the default one is fully loaded already with a recompiled TB).
9892 *
9893 * This is a bit of overkill, but this isn't a frequently used code path.
9894 */
9895 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9896 AssertReturn(pReNative, NULL);
9897
9898# if defined(RT_ARCH_AMD64)
9899 uint32_t const fAlignMask = 15;
9900# elif defined(RT_ARCH_ARM64)
9901 uint32_t const fAlignMask = 31 / 4;
9902# else
9903# error "port me"
9904# endif
9905 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9906 int rc = VINF_SUCCESS;
9907 uint32_t off = 0;
9908
9909 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9910 {
9911 /*
9912 * Emit the epilog code.
9913 */
9914 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9915 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9916 uint32_t const offReturnWithStatus = off;
9917 off = iemNativeEmitCoreEpilog(pReNative, off);
9918
9919 /*
9920 * Generate special jump labels. All of these gets a copy of the epilog code.
9921 */
9922 static struct
9923 {
9924 IEMNATIVELABELTYPE enmExitReason;
9925 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9926 } const s_aSpecialWithEpilogs[] =
9927 {
9928 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9929 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9930 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9931 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9932 };
9933 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9934 {
9935 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9936 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9937 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9938 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9939 off = iemNativeEmitCoreEpilog(pReNative, off);
9940 }
9941
9942 /*
9943 * Do what iemNativeEmitReturnBreakViaLookup does.
9944 */
9945 static struct
9946 {
9947 IEMNATIVELABELTYPE enmExitReason;
9948 uintptr_t pfnHelper;
9949 } const s_aViaLookup[] =
9950 {
9951 { kIemNativeLabelType_ReturnBreakViaLookup,
9952 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9953 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9954 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9955 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9956 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9957 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9958 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9959 };
9960 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9961 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9962 {
9963 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9964 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9965 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9966 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9967 }
9968
9969 /*
9970 * Generate simple TB tail labels that just calls a help with a pVCpu
9971 * arg and either return or longjmps/throws a non-zero status.
9972 */
9973 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9974 static struct
9975 {
9976 IEMNATIVELABELTYPE enmExitReason;
9977 bool fWithEpilog;
9978 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9979 } const s_aSimpleTailLabels[] =
9980 {
9981 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9982 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9983 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9984 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9985 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9986 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9987 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9988 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9989 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9990 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9991 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9992 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9993 };
9994 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9995 {
9996 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9997 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9998 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9999
10000 /* int pfnCallback(PVMCPUCC pVCpu) */
10001 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10002 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
10003
10004 /* If the callback is supposed to return with a status code we inline the epilog
10005 sequence for better speed. Otherwise, if the callback shouldn't return because
10006 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
10007 if (s_aSimpleTailLabels[i].fWithEpilog)
10008 off = iemNativeEmitCoreEpilog(pReNative, off);
10009 else
10010 {
10011# ifdef VBOX_STRICT
10012 off = iemNativeEmitBrk(pReNative, off, 0x2201);
10013# endif
10014 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
10015 }
10016 }
10017
10018
10019# ifdef VBOX_STRICT
10020 /* Make sure we've generate code for all labels. */
10021 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
10022 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10023# endif
10024 }
10025 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10026 {
10027 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
10028 iemNativeTerm(pReNative);
10029 return NULL;
10030 }
10031 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10032
10033 /*
10034 * Allocate memory for the context (first) and the common code (last).
10035 */
10036 PIEMNATIVEPERCHUNKCTX pCtx;
10037 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
10038 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
10039 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
10040 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
10041 AssertLogRelMsgReturn(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk), NULL);
10042
10043 /*
10044 * Copy over the generated code.
10045 * There should be no fixups or labels defined here.
10046 */
10047 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
10048 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
10049
10050 Assert(pReNative->cFixups == 0);
10051 Assert(pReNative->cLabels == 0);
10052
10053 /*
10054 * Initialize the context.
10055 */
10056 AssertCompile(kIemNativeLabelType_Invalid == 0);
10057 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
10058 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
10059 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
10060 {
10061 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10062 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
10063 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
10064 }
10065
10066 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
10067
10068 iemNativeTerm(pReNative);
10069 return pCtx;
10070}
10071
10072#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10073
10074/**
10075 * Recompiles the given threaded TB into a native one.
10076 *
10077 * In case of failure the translation block will be returned as-is.
10078 *
10079 * @returns pTb.
10080 * @param pVCpu The cross context virtual CPU structure of the calling
10081 * thread.
10082 * @param pTb The threaded translation to recompile to native.
10083 */
10084DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10085{
10086#if 0 /* For profiling the native recompiler code. */
10087l_profile_again:
10088#endif
10089 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10090
10091 /*
10092 * The first time thru, we allocate the recompiler state and save it,
10093 * all the other times we'll just reuse the saved one after a quick reset.
10094 */
10095 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10096 if (RT_LIKELY(pReNative))
10097 iemNativeReInit(pReNative, pTb);
10098 else
10099 {
10100 pReNative = iemNativeInit(pVCpu, pTb);
10101 AssertReturn(pReNative, pTb);
10102 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10103 }
10104
10105#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10106 /*
10107 * First do liveness analysis. This is done backwards.
10108 */
10109 {
10110 uint32_t idxCall = pTb->Thrd.cCalls;
10111 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10112 { /* likely */ }
10113 else
10114 {
10115 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10116 while (idxCall > cAlloc)
10117 cAlloc *= 2;
10118 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10119 AssertReturn(pvNew, pTb);
10120 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10121 pReNative->cLivenessEntriesAlloc = cAlloc;
10122 }
10123 AssertReturn(idxCall > 0, pTb);
10124 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10125
10126 /* The initial (final) entry. */
10127 idxCall--;
10128 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10129
10130 /* Loop backwards thru the calls and fill in the other entries. */
10131 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10132 while (idxCall > 0)
10133 {
10134 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10135 Assert(pfnLiveness);
10136 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10137 pCallEntry--;
10138 idxCall--;
10139 }
10140 }
10141#endif
10142
10143 /*
10144 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10145 * for aborting if an error happens.
10146 */
10147 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10148#ifdef LOG_ENABLED
10149 uint32_t const cCallsOrg = cCallsLeft;
10150#endif
10151 uint32_t off = 0;
10152 int rc = VINF_SUCCESS;
10153 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10154 {
10155#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
10156 /*
10157 * Emit prolog code (fixed).
10158 */
10159 off = iemNativeEmitProlog(pReNative, off);
10160#endif
10161
10162 /*
10163 * Convert the calls to native code.
10164 */
10165#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10166 int32_t iGstInstr = -1;
10167#endif
10168#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10169 uint32_t cThreadedCalls = 0;
10170 uint32_t cRecompiledCalls = 0;
10171#endif
10172#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10173 uint32_t idxCurCall = 0;
10174#endif
10175 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10176 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10177 while (cCallsLeft-- > 0)
10178 {
10179 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10180#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10181 pReNative->idxCurCall = idxCurCall;
10182#endif
10183
10184#ifdef IEM_WITH_INTRA_TB_JUMPS
10185 /*
10186 * Define label for jump targets (currently only the first entry).
10187 */
10188 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10189 { /* likely */ }
10190 else
10191 {
10192 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10193 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10194 }
10195#endif
10196
10197 /*
10198 * Debug info, assembly markup and statistics.
10199 */
10200#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10201 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10202 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10203#endif
10204#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10205 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10206 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10207 {
10208 if (iGstInstr < (int32_t)pTb->cInstructions)
10209 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10210 else
10211 Assert(iGstInstr == pTb->cInstructions);
10212 iGstInstr = pCallEntry->idxInstr;
10213 }
10214 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10215#endif
10216#if defined(VBOX_STRICT)
10217 off = iemNativeEmitMarker(pReNative, off,
10218 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10219#endif
10220#if defined(VBOX_STRICT)
10221 iemNativeRegAssertSanity(pReNative);
10222#endif
10223#ifdef VBOX_WITH_STATISTICS
10224 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10225#endif
10226
10227#if 0
10228 if ( pTb->GCPhysPc == 0x00000000000c1240
10229 && idxCurCall == 67)
10230 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10231#endif
10232
10233 /*
10234 * Actual work.
10235 */
10236 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10237 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10238 if (pfnRecom) /** @todo stats on this. */
10239 {
10240 off = pfnRecom(pReNative, off, pCallEntry);
10241 STAM_REL_STATS({cRecompiledCalls++;});
10242 }
10243 else
10244 {
10245 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10246 STAM_REL_STATS({cThreadedCalls++;});
10247 }
10248 Assert(off <= pReNative->cInstrBufAlloc);
10249 Assert(pReNative->cCondDepth == 0);
10250
10251#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10252 if (LogIs2Enabled())
10253 {
10254 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10255# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10256 static const char s_achState[] = "CUXI";
10257# else
10258 /* 0123 4567 89ab cdef */
10259 /* CCCC CCCC */
10260 /* WWWW WWWW */
10261 /* RR RR RR RR */
10262 /* P P P P P P P P */
10263 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10264# endif
10265
10266 char szGpr[17];
10267 for (unsigned i = 0; i < 16; i++)
10268 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10269 szGpr[16] = '\0';
10270
10271 char szSegBase[X86_SREG_COUNT + 1];
10272 char szSegLimit[X86_SREG_COUNT + 1];
10273 char szSegAttrib[X86_SREG_COUNT + 1];
10274 char szSegSel[X86_SREG_COUNT + 1];
10275 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10276 {
10277 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10278 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10279 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10280 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10281 }
10282 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10283 = szSegSel[X86_SREG_COUNT] = '\0';
10284
10285 char szEFlags[8];
10286 for (unsigned i = 0; i < 7; i++)
10287 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10288 szEFlags[7] = '\0';
10289
10290 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10291 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10292 }
10293#endif
10294
10295 /*
10296 * Advance.
10297 */
10298 pCallEntry++;
10299#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10300 idxCurCall++;
10301#endif
10302 }
10303
10304 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10305 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10306 if (!cThreadedCalls)
10307 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10308
10309#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
10310 Assert(pReNative->fSkippingEFlags == 0);
10311#endif
10312#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
10313 Assert(pReNative->fPostponingEFlags == 0);
10314#endif
10315
10316#ifdef VBOX_WITH_STATISTICS
10317 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10318#endif
10319
10320 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10321 off = iemNativeRegFlushPendingWrites(pReNative, off);
10322
10323#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10324 /*
10325 * Successful return, so clear the return register (eax, w0).
10326 */
10327 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
10328
10329 /*
10330 * Emit the epilog code.
10331 */
10332 uint32_t idxReturnLabel;
10333 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10334#else
10335 /*
10336 * Jump to the common per-chunk epilog code.
10337 */
10338 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10339 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnSuccess);
10340#endif
10341
10342#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10343 /*
10344 * Generate special jump labels.
10345 */
10346 off = iemNativeEmitRcFiddling(pReNative, off, idxReturnLabel);
10347
10348 bool const fReturnBreakViaLookup = RT_BOOL( pReNative->bmLabelTypes
10349 & ( RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookup)
10350 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithIrq)
10351 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlb)
10352 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq)));
10353 if (fReturnBreakViaLookup)
10354 {
10355 uint32_t const idxReturnBreakLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
10356 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10357 off = iemNativeEmitReturnBreakViaLookup(pReNative, off, idxReturnBreakLabel);
10358 }
10359 else if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10360 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10361
10362 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
10363 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
10364
10365 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10366 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10367
10368 /*
10369 * Generate simple TB tail labels that just calls a help with a pVCpu
10370 * arg and either return or longjmps/throws a non-zero status.
10371 *
10372 * The array entries must be ordered by enmLabel value so we can index
10373 * using fTailLabels bit numbers.
10374 */
10375 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10376 static struct
10377 {
10378 IEMNATIVELABELTYPE enmLabel;
10379 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10380 } const g_aSimpleTailLabels[] =
10381 {
10382 { kIemNativeLabelType_Invalid, NULL },
10383 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10384 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10385 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10386 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10387 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10388 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10389 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10390 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10391 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10392 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10393 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10394 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10395 };
10396
10397 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10398 AssertCompile(kIemNativeLabelType_Invalid == 0);
10399 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10400 if (fTailLabels)
10401 {
10402 do
10403 {
10404 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10405 fTailLabels &= ~RT_BIT_64(enmLabel);
10406 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10407
10408 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10409 Assert(idxLabel != UINT32_MAX);
10410 if (idxLabel != UINT32_MAX)
10411 {
10412 iemNativeLabelDefine(pReNative, idxLabel, off);
10413
10414 /* int pfnCallback(PVMCPUCC pVCpu) */
10415 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10416 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10417
10418 /* jump back to the return sequence. */
10419 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10420 }
10421
10422 } while (fTailLabels);
10423 }
10424
10425#else /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10426 /*
10427 * Generate tail labels with jumps to the common per-chunk code.
10428 */
10429# ifndef RT_ARCH_AMD64
10430 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10431 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10432 AssertCompile(kIemNativeLabelType_Invalid == 0);
10433 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10434 if (fTailLabels)
10435 {
10436 do
10437 {
10438 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10439 fTailLabels &= ~RT_BIT_64(enmLabel);
10440
10441 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10442 AssertContinue(idxLabel != UINT32_MAX);
10443 iemNativeLabelDefine(pReNative, idxLabel, off);
10444 off = iemNativeEmitTbExit(pReNative, off, enmLabel);
10445 } while (fTailLabels);
10446 }
10447# else
10448 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10449# endif
10450#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10451 }
10452 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10453 {
10454 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10455 return pTb;
10456 }
10457 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10458 Assert(off <= pReNative->cInstrBufAlloc);
10459
10460 /*
10461 * Make sure all labels has been defined.
10462 */
10463 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10464#ifdef VBOX_STRICT
10465 uint32_t const cLabels = pReNative->cLabels;
10466 for (uint32_t i = 0; i < cLabels; i++)
10467 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10468#endif
10469
10470#if 0 /* For profiling the native recompiler code. */
10471 if (pTb->Thrd.cCalls >= 136)
10472 {
10473 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10474 goto l_profile_again;
10475 }
10476#endif
10477
10478 /*
10479 * Allocate executable memory, copy over the code we've generated.
10480 */
10481 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10482 if (pTbAllocator->pDelayedFreeHead)
10483 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10484
10485 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10486#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10487 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10488 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10489 &paFinalInstrBufRx, &pCtx);
10490
10491#else
10492 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10493 &paFinalInstrBufRx, NULL);
10494#endif
10495 AssertReturn(paFinalInstrBuf, pTb);
10496 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10497
10498 /*
10499 * Apply fixups.
10500 */
10501 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10502 uint32_t const cFixups = pReNative->cFixups;
10503 for (uint32_t i = 0; i < cFixups; i++)
10504 {
10505 Assert(paFixups[i].off < off);
10506 Assert(paFixups[i].idxLabel < cLabels);
10507 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10508 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10509 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10510 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10511 switch (paFixups[i].enmType)
10512 {
10513#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10514 case kIemNativeFixupType_Rel32:
10515 Assert(paFixups[i].off + 4 <= off);
10516 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10517 continue;
10518
10519#elif defined(RT_ARCH_ARM64)
10520 case kIemNativeFixupType_RelImm26At0:
10521 {
10522 Assert(paFixups[i].off < off);
10523 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10524 Assert(offDisp >= -33554432 && offDisp < 33554432);
10525 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10526 continue;
10527 }
10528
10529 case kIemNativeFixupType_RelImm19At5:
10530 {
10531 Assert(paFixups[i].off < off);
10532 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10533 Assert(offDisp >= -262144 && offDisp < 262144);
10534 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10535 continue;
10536 }
10537
10538 case kIemNativeFixupType_RelImm14At5:
10539 {
10540 Assert(paFixups[i].off < off);
10541 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10542 Assert(offDisp >= -8192 && offDisp < 8192);
10543 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10544 continue;
10545 }
10546
10547#endif
10548 case kIemNativeFixupType_Invalid:
10549 case kIemNativeFixupType_End:
10550 break;
10551 }
10552 AssertFailed();
10553 }
10554
10555#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10556 /*
10557 * Apply TB exit fixups.
10558 */
10559 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10560 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10561 for (uint32_t i = 0; i < cTbExitFixups; i++)
10562 {
10563 Assert(paTbExitFixups[i].off < off);
10564 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10565 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10566
10567# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10568 Assert(paTbExitFixups[i].off + 4 <= off);
10569 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10570 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10571 *Ptr.pi32 = (int32_t)offDisp;
10572
10573# elif defined(RT_ARCH_ARM64)
10574 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10575 Assert(offDisp >= -33554432 && offDisp < 33554432);
10576 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10577
10578# else
10579# error "Port me!"
10580# endif
10581 }
10582#endif
10583
10584 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10585 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10586
10587 /*
10588 * Convert the translation block.
10589 */
10590 RTMemFree(pTb->Thrd.paCalls);
10591 pTb->Native.paInstructions = paFinalInstrBufRx;
10592 pTb->Native.cInstructions = off;
10593 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10594#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10595 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10596 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10597 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10598#endif
10599
10600 Assert(pTbAllocator->cThreadedTbs > 0);
10601 pTbAllocator->cThreadedTbs -= 1;
10602 pTbAllocator->cNativeTbs += 1;
10603 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10604
10605#ifdef LOG_ENABLED
10606 /*
10607 * Disassemble to the log if enabled.
10608 */
10609 if (LogIs3Enabled())
10610 {
10611 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10612 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10613# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10614 RTLogFlush(NULL);
10615# endif
10616 }
10617#endif
10618 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10619
10620 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10621 return pTb;
10622}
10623
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette