VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

Last change on this file was 108791, checked in by vboxsync, 4 weeks ago

VMM/IEM: More ARM target work. jiraref:VBP-1598

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 457.2 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 108791 2025-03-28 21:58:31Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : Postponed and skipped EFLAGS calculations.
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#define VBOX_DIS_WITH_ARMV8
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/dbgf.h>
55#include <VBox/vmm/tm.h>
56#include "IEMInternal.h"
57#include <VBox/vmm/vmcc.h>
58#include <VBox/log.h>
59#include <VBox/err.h>
60#include <VBox/dis.h>
61#include <VBox/param.h>
62#include <iprt/assert.h>
63#include <iprt/mem.h>
64#include <iprt/string.h>
65#if defined(RT_ARCH_AMD64)
66# include <iprt/x86.h>
67#elif defined(RT_ARCH_ARM64)
68# include <iprt/armv8.h>
69#endif
70
71#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
72# include "/opt/local/include/capstone/capstone.h"
73#endif
74
75#include "IEMInline.h"
76#include "IEMThreadedFunctions.h"
77#include "IEMN8veRecompiler.h"
78#include "IEMN8veRecompilerEmit.h"
79#include "IEMN8veRecompilerTlbLookup.h"
80#include "IEMNativeFunctions.h"
81#include "target-x86/IEMAllN8veEmit-x86.h"
82
83
84/*
85 * Narrow down configs here to avoid wasting time on unused configs here.
86 * Note! Same checks in IEMAllThrdRecompiler.cpp.
87 */
88
89#ifndef IEM_WITH_CODE_TLB
90# error The code TLB must be enabled for the recompiler.
91#endif
92
93#ifndef IEM_WITH_DATA_TLB
94# error The data TLB must be enabled for the recompiler.
95#endif
96
97/** @todo eliminate this clang build hack. */
98#if RT_CLANG_PREREQ(4, 0)
99# pragma GCC diagnostic ignored "-Wunused-function"
100#endif
101
102
103/*********************************************************************************************************************************
104* Internal Functions *
105*********************************************************************************************************************************/
106#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
107static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
108#endif
109DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
110DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
111 IEMNATIVEGSTREG enmGstReg, uint32_t off);
112DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
113static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
114
115
116
117/*********************************************************************************************************************************
118* Native Recompilation *
119*********************************************************************************************************************************/
120
121
122/**
123 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
124 */
125IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
126{
127 pVCpu->iem.s.cInstructions += idxInstr;
128 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
129}
130
131
132/**
133 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
134 */
135DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
136{
137 uint64_t fCpu = pVCpu->fLocalForcedActions;
138 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
139 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
140 | VMCPU_FF_TLB_FLUSH
141 | VMCPU_FF_UNHALT );
142 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
143 if (RT_LIKELY( ( !fCpu
144 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
145 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
146 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
147 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
148 return false;
149 return true;
150}
151
152
153/**
154 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
155 */
156template<bool const a_fWithIrqCheck>
157IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
158 uint32_t fFlags, RTGCPHYS GCPhysPc))
159{
160 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
161 Assert(idxTbLookup < pTb->cTbLookupEntries);
162 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
163#if 1
164 PIEMTB const pNewTb = *ppNewTb;
165 if (pNewTb)
166 {
167# ifdef VBOX_STRICT
168 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
169 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
170 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
171 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
172 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
173 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
174# endif
175 if (pNewTb->GCPhysPc == GCPhysPc)
176 {
177# ifdef VBOX_STRICT
178 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
179 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
180 fAssertFlags |= IEMTB_F_X86_INHIBIT_SHADOW;
181 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
182 fAssertFlags |= IEMTB_F_X86_INHIBIT_NMI;
183# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
184 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
185# else
186 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
187 {
188 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
189 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
190 fAssertFlags |= IEMTB_F_X86_CS_LIM_CHECKS;
191 }
192# endif
193 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
194 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
195 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
196#endif
197
198 /*
199 * Check them + type.
200 */
201 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
202 {
203 /*
204 * Check for interrupts and stuff.
205 */
206 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
207 * The main problem are the statistics and to some degree the logging. :/ */
208 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
209 {
210 /* Do polling. */
211 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
212 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
213 {
214 /*
215 * Success. Update statistics and switch to the next TB.
216 */
217 if (a_fWithIrqCheck)
218 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
219 else
220 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
221
222 pNewTb->cUsed += 1;
223 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
224 pVCpu->iem.s.pCurTbR3 = pNewTb;
225 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
226 pVCpu->iem.s.cTbExecNative += 1;
227 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
228 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
229 return (uintptr_t)pNewTb->Native.paInstructions;
230 }
231 }
232 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
233 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
234 }
235 else
236 {
237 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
238 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
239 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
240 }
241 }
242 else
243 {
244 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
245 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
246 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
247 }
248 }
249 else
250 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
251#else
252 NOREF(GCPhysPc);
253#endif
254
255 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
256 return 0;
257}
258
259
260/**
261 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
262 */
263template <bool const a_fWithIrqCheck>
264IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
265{
266 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
267 Assert(idxTbLookup < pTb->cTbLookupEntries);
268 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
269#if 1
270 PIEMTB const pNewTb = *ppNewTb;
271 if (pNewTb)
272 {
273 /*
274 * Calculate the flags for the next TB and check if they match.
275 */
276 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
277 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
278 { /* likely */ }
279 else
280 {
281 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
282 fFlags |= IEMTB_F_X86_INHIBIT_SHADOW;
283 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
284 fFlags |= IEMTB_F_X86_INHIBIT_NMI;
285 }
286 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
287 {
288 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
289 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
290 { /* likely */ }
291 else
292 fFlags |= IEMTB_F_X86_CS_LIM_CHECKS;
293 }
294 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
295
296 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
297 {
298 /*
299 * Do the TLB lookup for flat RIP and compare the result with the next TB.
300 *
301 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
302 */
303 /* Calc the effective PC. */
304 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
305 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
306 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
307
308 /* Advance within the current buffer (PAGE) when possible. */
309 RTGCPHYS GCPhysPc;
310 uint64_t off;
311 if ( pVCpu->iem.s.pbInstrBuf
312 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
313 {
314 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
315 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
316 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
317 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
318 else
319 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
320 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
321 }
322 else
323 {
324 pVCpu->iem.s.pbInstrBuf = NULL;
325 pVCpu->iem.s.offCurInstrStart = 0;
326 pVCpu->iem.s.offInstrNextByte = 0;
327 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
328 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
329 }
330
331 if (pNewTb->GCPhysPc == GCPhysPc)
332 {
333 /*
334 * Check for interrupts and stuff.
335 */
336 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
337 * The main problem are the statistics and to some degree the logging. :/ */
338 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
339 {
340 /* Do polling. */
341 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
342 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
343 {
344 /*
345 * Success. Update statistics and switch to the next TB.
346 */
347 if (a_fWithIrqCheck)
348 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
349 else
350 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
351
352 pNewTb->cUsed += 1;
353 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
354 pVCpu->iem.s.pCurTbR3 = pNewTb;
355 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
356 pVCpu->iem.s.cTbExecNative += 1;
357 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
358 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
359 return (uintptr_t)pNewTb->Native.paInstructions;
360 }
361 }
362 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
363 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
364 }
365 else
366 {
367 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
368 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
369 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
370 }
371 }
372 else
373 {
374 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
375 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
376 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
377 }
378 }
379 else
380 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
381#else
382 NOREF(fFlags);
383 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
384#endif
385
386 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
387 return 0;
388}
389
390
391/**
392 * Used by TB code when it wants to raise a \#DE.
393 */
394IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
395{
396 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
397 iemRaiseDivideErrorJmp(pVCpu);
398#ifndef _MSC_VER
399 return VINF_IEM_RAISED_XCPT; /* not reached */
400#endif
401}
402
403
404/**
405 * Used by TB code when it wants to raise a \#UD.
406 */
407IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
408{
409 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
410 iemRaiseUndefinedOpcodeJmp(pVCpu);
411#ifndef _MSC_VER
412 return VINF_IEM_RAISED_XCPT; /* not reached */
413#endif
414}
415
416
417/**
418 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
419 *
420 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
421 */
422IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
423{
424 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
425 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
426 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
427 iemRaiseUndefinedOpcodeJmp(pVCpu);
428 else
429 iemRaiseDeviceNotAvailableJmp(pVCpu);
430#ifndef _MSC_VER
431 return VINF_IEM_RAISED_XCPT; /* not reached */
432#endif
433}
434
435
436/**
437 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
438 *
439 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
440 */
441IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
442{
443 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
444 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
445 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
446 iemRaiseUndefinedOpcodeJmp(pVCpu);
447 else
448 iemRaiseDeviceNotAvailableJmp(pVCpu);
449#ifndef _MSC_VER
450 return VINF_IEM_RAISED_XCPT; /* not reached */
451#endif
452}
453
454
455/**
456 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
457 *
458 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
459 */
460IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
461{
462 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
463 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
464 iemRaiseSimdFpExceptionJmp(pVCpu);
465 else
466 iemRaiseUndefinedOpcodeJmp(pVCpu);
467#ifndef _MSC_VER
468 return VINF_IEM_RAISED_XCPT; /* not reached */
469#endif
470}
471
472
473/**
474 * Used by TB code when it wants to raise a \#NM.
475 */
476IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
477{
478 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
479 iemRaiseDeviceNotAvailableJmp(pVCpu);
480#ifndef _MSC_VER
481 return VINF_IEM_RAISED_XCPT; /* not reached */
482#endif
483}
484
485
486/**
487 * Used by TB code when it wants to raise a \#GP(0).
488 */
489IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
490{
491 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
492 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
493#ifndef _MSC_VER
494 return VINF_IEM_RAISED_XCPT; /* not reached */
495#endif
496}
497
498
499/**
500 * Used by TB code when it wants to raise a \#MF.
501 */
502IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
503{
504 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
505 iemRaiseMathFaultJmp(pVCpu);
506#ifndef _MSC_VER
507 return VINF_IEM_RAISED_XCPT; /* not reached */
508#endif
509}
510
511
512/**
513 * Used by TB code when it wants to raise a \#XF.
514 */
515IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
516{
517 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
518 iemRaiseSimdFpExceptionJmp(pVCpu);
519#ifndef _MSC_VER
520 return VINF_IEM_RAISED_XCPT; /* not reached */
521#endif
522}
523
524
525/**
526 * Used by TB code when detecting opcode changes.
527 * @see iemThreadeFuncWorkerObsoleteTb
528 */
529IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
530{
531 /* We set fSafeToFree to false where as we're being called in the context
532 of a TB callback function, which for native TBs means we cannot release
533 the executable memory till we've returned our way back to iemTbExec as
534 that return path codes via the native code generated for the TB. */
535 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
536 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
537 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
538 return VINF_IEM_REEXEC_BREAK;
539}
540
541
542/**
543 * Used by TB code when we need to switch to a TB with CS.LIM checking.
544 */
545IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
546{
547 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
548 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
549 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
550 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
551 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
552 return VINF_IEM_REEXEC_BREAK;
553}
554
555
556/**
557 * Used by TB code when we missed a PC check after a branch.
558 */
559IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
560{
561 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
562 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
563 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
564 pVCpu->iem.s.pbInstrBuf));
565 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
566 return VINF_IEM_REEXEC_BREAK;
567}
568
569
570
571/*********************************************************************************************************************************
572* Helpers: Segmented memory fetches and stores. *
573*********************************************************************************************************************************/
574
575/**
576 * Used by TB code to load unsigned 8-bit data w/ segmentation.
577 */
578IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
579{
580#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
581 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
582#else
583 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
584#endif
585}
586
587
588/**
589 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
590 * to 16 bits.
591 */
592IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
593{
594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
595 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
596#else
597 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
598#endif
599}
600
601
602/**
603 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
604 * to 32 bits.
605 */
606IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
607{
608#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
609 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
610#else
611 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
612#endif
613}
614
615/**
616 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
617 * to 64 bits.
618 */
619IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
620{
621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
622 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
623#else
624 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
625#endif
626}
627
628
629/**
630 * Used by TB code to load unsigned 16-bit data w/ segmentation.
631 */
632IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
633{
634#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
635 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
636#else
637 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
638#endif
639}
640
641
642/**
643 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
644 * to 32 bits.
645 */
646IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
647{
648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
649 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
650#else
651 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
652#endif
653}
654
655
656/**
657 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
658 * to 64 bits.
659 */
660IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
661{
662#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
663 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
664#else
665 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
666#endif
667}
668
669
670/**
671 * Used by TB code to load unsigned 32-bit data w/ segmentation.
672 */
673IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
674{
675#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
676 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
677#else
678 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
679#endif
680}
681
682
683/**
684 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
685 * to 64 bits.
686 */
687IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
688{
689#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
690 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
691#else
692 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
693#endif
694}
695
696
697/**
698 * Used by TB code to load unsigned 64-bit data w/ segmentation.
699 */
700IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
701{
702#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
703 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
704#else
705 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
706#endif
707}
708
709
710/**
711 * Used by TB code to load 128-bit data w/ segmentation.
712 */
713IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
714{
715#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
716 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
717#else
718 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
719#endif
720}
721
722
723/**
724 * Used by TB code to load 128-bit data w/ segmentation.
725 */
726IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
727{
728#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
729 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
730#else
731 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
732#endif
733}
734
735
736/**
737 * Used by TB code to load 128-bit data w/ segmentation.
738 */
739IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
740{
741#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
742 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
743#else
744 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
745#endif
746}
747
748
749/**
750 * Used by TB code to load 256-bit data w/ segmentation.
751 */
752IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
753{
754#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
755 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
756#else
757 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
758#endif
759}
760
761
762/**
763 * Used by TB code to load 256-bit data w/ segmentation.
764 */
765IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
766{
767#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
768 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
769#else
770 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
771#endif
772}
773
774
775/**
776 * Used by TB code to store unsigned 8-bit data w/ segmentation.
777 */
778IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
779{
780#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
781 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
782#else
783 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
784#endif
785}
786
787
788/**
789 * Used by TB code to store unsigned 16-bit data w/ segmentation.
790 */
791IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
792{
793#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
794 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
795#else
796 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
797#endif
798}
799
800
801/**
802 * Used by TB code to store unsigned 32-bit data w/ segmentation.
803 */
804IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
805{
806#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
807 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
808#else
809 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
810#endif
811}
812
813
814/**
815 * Used by TB code to store unsigned 64-bit data w/ segmentation.
816 */
817IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
818{
819#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
820 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
821#else
822 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
823#endif
824}
825
826
827/**
828 * Used by TB code to store unsigned 128-bit data w/ segmentation.
829 */
830IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
831{
832#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
833 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
834#else
835 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
836#endif
837}
838
839
840/**
841 * Used by TB code to store unsigned 128-bit data w/ segmentation.
842 */
843IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
844{
845#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
846 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
847#else
848 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
849#endif
850}
851
852
853/**
854 * Used by TB code to store unsigned 256-bit data w/ segmentation.
855 */
856IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
857{
858#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
859 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
860#else
861 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
862#endif
863}
864
865
866/**
867 * Used by TB code to store unsigned 256-bit data w/ segmentation.
868 */
869IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
870{
871#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
872 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
873#else
874 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
875#endif
876}
877
878
879/**
880 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
881 */
882IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
883{
884#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
885 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
886#else
887 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
888#endif
889}
890
891
892/**
893 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
894 */
895IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
896{
897#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
898 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
899#else
900 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
901#endif
902}
903
904
905/**
906 * Used by TB code to store an 32-bit selector value onto a generic stack.
907 *
908 * Intel CPUs doesn't do write a whole dword, thus the special function.
909 */
910IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
911{
912#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
913 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
914#else
915 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
916#endif
917}
918
919
920/**
921 * Used by TB code to push unsigned 64-bit value onto a generic stack.
922 */
923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
924{
925#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
926 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
927#else
928 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
929#endif
930}
931
932
933/**
934 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
935 */
936IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
937{
938#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
939 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
940#else
941 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
942#endif
943}
944
945
946/**
947 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
948 */
949IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
950{
951#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
952 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
953#else
954 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
955#endif
956}
957
958
959/**
960 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
961 */
962IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
963{
964#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
965 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
966#else
967 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
968#endif
969}
970
971
972
973/*********************************************************************************************************************************
974* Helpers: Flat memory fetches and stores. *
975*********************************************************************************************************************************/
976
977/**
978 * Used by TB code to load unsigned 8-bit data w/ flat address.
979 * @note Zero extending the value to 64-bit to simplify assembly.
980 */
981IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
982{
983#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
984 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
985#else
986 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
987#endif
988}
989
990
991/**
992 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
993 * to 16 bits.
994 * @note Zero extending the value to 64-bit to simplify assembly.
995 */
996IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
997{
998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
999 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1000#else
1001 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1002#endif
1003}
1004
1005
1006/**
1007 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1008 * to 32 bits.
1009 * @note Zero extending the value to 64-bit to simplify assembly.
1010 */
1011IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1012{
1013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1014 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1015#else
1016 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1017#endif
1018}
1019
1020
1021/**
1022 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1023 * to 64 bits.
1024 */
1025IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1026{
1027#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1028 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1029#else
1030 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1031#endif
1032}
1033
1034
1035/**
1036 * Used by TB code to load unsigned 16-bit data w/ flat address.
1037 * @note Zero extending the value to 64-bit to simplify assembly.
1038 */
1039IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1040{
1041#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1042 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1043#else
1044 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1045#endif
1046}
1047
1048
1049/**
1050 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1051 * to 32 bits.
1052 * @note Zero extending the value to 64-bit to simplify assembly.
1053 */
1054IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1055{
1056#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1057 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1058#else
1059 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1060#endif
1061}
1062
1063
1064/**
1065 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1066 * to 64 bits.
1067 * @note Zero extending the value to 64-bit to simplify assembly.
1068 */
1069IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1070{
1071#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1072 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1073#else
1074 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1075#endif
1076}
1077
1078
1079/**
1080 * Used by TB code to load unsigned 32-bit data w/ flat address.
1081 * @note Zero extending the value to 64-bit to simplify assembly.
1082 */
1083IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1084{
1085#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1086 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1087#else
1088 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1089#endif
1090}
1091
1092
1093/**
1094 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1095 * to 64 bits.
1096 * @note Zero extending the value to 64-bit to simplify assembly.
1097 */
1098IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1099{
1100#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1101 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1102#else
1103 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1104#endif
1105}
1106
1107
1108/**
1109 * Used by TB code to load unsigned 64-bit data w/ flat address.
1110 */
1111IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1112{
1113#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1114 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1115#else
1116 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1117#endif
1118}
1119
1120
1121/**
1122 * Used by TB code to load unsigned 128-bit data w/ flat address.
1123 */
1124IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1125{
1126#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1127 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1128#else
1129 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1130#endif
1131}
1132
1133
1134/**
1135 * Used by TB code to load unsigned 128-bit data w/ flat address.
1136 */
1137IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1138{
1139#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1140 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1141#else
1142 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1143#endif
1144}
1145
1146
1147/**
1148 * Used by TB code to load unsigned 128-bit data w/ flat address.
1149 */
1150IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1151{
1152#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1153 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1154#else
1155 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1156#endif
1157}
1158
1159
1160/**
1161 * Used by TB code to load unsigned 256-bit data w/ flat address.
1162 */
1163IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1164{
1165#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1166 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1167#else
1168 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1169#endif
1170}
1171
1172
1173/**
1174 * Used by TB code to load unsigned 256-bit data w/ flat address.
1175 */
1176IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1177{
1178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1179 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1180#else
1181 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1182#endif
1183}
1184
1185
1186/**
1187 * Used by TB code to store unsigned 8-bit data w/ flat address.
1188 */
1189IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1190{
1191#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1192 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1193#else
1194 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1195#endif
1196}
1197
1198
1199/**
1200 * Used by TB code to store unsigned 16-bit data w/ flat address.
1201 */
1202IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1203{
1204#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1205 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1206#else
1207 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1208#endif
1209}
1210
1211
1212/**
1213 * Used by TB code to store unsigned 32-bit data w/ flat address.
1214 */
1215IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1216{
1217#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1218 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1219#else
1220 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1221#endif
1222}
1223
1224
1225/**
1226 * Used by TB code to store unsigned 64-bit data w/ flat address.
1227 */
1228IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1229{
1230#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1231 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1232#else
1233 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1234#endif
1235}
1236
1237
1238/**
1239 * Used by TB code to store unsigned 128-bit data w/ flat address.
1240 */
1241IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1242{
1243#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1244 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1245#else
1246 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1247#endif
1248}
1249
1250
1251/**
1252 * Used by TB code to store unsigned 128-bit data w/ flat address.
1253 */
1254IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1255{
1256#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1257 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1258#else
1259 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1260#endif
1261}
1262
1263
1264/**
1265 * Used by TB code to store unsigned 256-bit data w/ flat address.
1266 */
1267IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1268{
1269#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1270 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1271#else
1272 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1273#endif
1274}
1275
1276
1277/**
1278 * Used by TB code to store unsigned 256-bit data w/ flat address.
1279 */
1280IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1281{
1282#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1283 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1284#else
1285 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1286#endif
1287}
1288
1289
1290/**
1291 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1292 */
1293IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1294{
1295#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1296 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1297#else
1298 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1299#endif
1300}
1301
1302
1303/**
1304 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1305 */
1306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1307{
1308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1309 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1310#else
1311 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1312#endif
1313}
1314
1315
1316/**
1317 * Used by TB code to store a segment selector value onto a flat stack.
1318 *
1319 * Intel CPUs doesn't do write a whole dword, thus the special function.
1320 */
1321IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1322{
1323#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1324 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1325#else
1326 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1327#endif
1328}
1329
1330
1331/**
1332 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1333 */
1334IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1335{
1336#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1337 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1338#else
1339 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1340#endif
1341}
1342
1343
1344/**
1345 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1346 */
1347IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1350 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1351#else
1352 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1361{
1362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1363 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1364#else
1365 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1366#endif
1367}
1368
1369
1370/**
1371 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1372 */
1373IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1374{
1375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1376 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1377#else
1378 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1379#endif
1380}
1381
1382
1383
1384/*********************************************************************************************************************************
1385* Helpers: Segmented memory mapping. *
1386*********************************************************************************************************************************/
1387
1388/**
1389 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1390 * segmentation.
1391 */
1392IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1393 RTGCPTR GCPtrMem, uint8_t iSegReg))
1394{
1395#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1396 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1397#else
1398 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1399#endif
1400}
1401
1402
1403/**
1404 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1405 */
1406IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1407 RTGCPTR GCPtrMem, uint8_t iSegReg))
1408{
1409#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1410 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1411#else
1412 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1413#endif
1414}
1415
1416
1417/**
1418 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1419 */
1420IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1421 RTGCPTR GCPtrMem, uint8_t iSegReg))
1422{
1423#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1424 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1425#else
1426 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1427#endif
1428}
1429
1430
1431/**
1432 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1433 */
1434IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1435 RTGCPTR GCPtrMem, uint8_t iSegReg))
1436{
1437#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1438 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1439#else
1440 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1441#endif
1442}
1443
1444
1445/**
1446 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1447 * segmentation.
1448 */
1449IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1450 RTGCPTR GCPtrMem, uint8_t iSegReg))
1451{
1452#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1453 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1454#else
1455 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1456#endif
1457}
1458
1459
1460/**
1461 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1462 */
1463IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1464 RTGCPTR GCPtrMem, uint8_t iSegReg))
1465{
1466#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1467 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1468#else
1469 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1470#endif
1471}
1472
1473
1474/**
1475 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1476 */
1477IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1478 RTGCPTR GCPtrMem, uint8_t iSegReg))
1479{
1480#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1481 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1482#else
1483 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1484#endif
1485}
1486
1487
1488/**
1489 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1490 */
1491IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1492 RTGCPTR GCPtrMem, uint8_t iSegReg))
1493{
1494#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1495 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1496#else
1497 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1498#endif
1499}
1500
1501
1502/**
1503 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1504 * segmentation.
1505 */
1506IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1507 RTGCPTR GCPtrMem, uint8_t iSegReg))
1508{
1509#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1510 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1511#else
1512 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1513#endif
1514}
1515
1516
1517/**
1518 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1519 */
1520IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1521 RTGCPTR GCPtrMem, uint8_t iSegReg))
1522{
1523#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1524 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1525#else
1526 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1527#endif
1528}
1529
1530
1531/**
1532 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1533 */
1534IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1535 RTGCPTR GCPtrMem, uint8_t iSegReg))
1536{
1537#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1538 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1539#else
1540 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1541#endif
1542}
1543
1544
1545/**
1546 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1547 */
1548IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1549 RTGCPTR GCPtrMem, uint8_t iSegReg))
1550{
1551#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1552 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1553#else
1554 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1555#endif
1556}
1557
1558
1559/**
1560 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1561 * segmentation.
1562 */
1563IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1564 RTGCPTR GCPtrMem, uint8_t iSegReg))
1565{
1566#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1567 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1568#else
1569 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1570#endif
1571}
1572
1573
1574/**
1575 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1576 */
1577IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1578 RTGCPTR GCPtrMem, uint8_t iSegReg))
1579{
1580#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1581 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1582#else
1583 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1584#endif
1585}
1586
1587
1588/**
1589 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1590 */
1591IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1592 RTGCPTR GCPtrMem, uint8_t iSegReg))
1593{
1594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1595 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1596#else
1597 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1598#endif
1599}
1600
1601
1602/**
1603 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1604 */
1605IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1606 RTGCPTR GCPtrMem, uint8_t iSegReg))
1607{
1608#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1609 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1610#else
1611 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1612#endif
1613}
1614
1615
1616/**
1617 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1618 */
1619IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1620 RTGCPTR GCPtrMem, uint8_t iSegReg))
1621{
1622#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1623 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1624#else
1625 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1626#endif
1627}
1628
1629
1630/**
1631 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1632 */
1633IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1634 RTGCPTR GCPtrMem, uint8_t iSegReg))
1635{
1636#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1637 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1638#else
1639 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1640#endif
1641}
1642
1643
1644/**
1645 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1646 * segmentation.
1647 */
1648IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1649 RTGCPTR GCPtrMem, uint8_t iSegReg))
1650{
1651#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1652 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1653#else
1654 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1655#endif
1656}
1657
1658
1659/**
1660 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1661 */
1662IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1663 RTGCPTR GCPtrMem, uint8_t iSegReg))
1664{
1665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1666 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1667#else
1668 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1669#endif
1670}
1671
1672
1673/**
1674 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1675 */
1676IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1677 RTGCPTR GCPtrMem, uint8_t iSegReg))
1678{
1679#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1680 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1681#else
1682 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1683#endif
1684}
1685
1686
1687/**
1688 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1689 */
1690IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1691 RTGCPTR GCPtrMem, uint8_t iSegReg))
1692{
1693#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1694 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1695#else
1696 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1697#endif
1698}
1699
1700
1701/*********************************************************************************************************************************
1702* Helpers: Flat memory mapping. *
1703*********************************************************************************************************************************/
1704
1705/**
1706 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1707 * address.
1708 */
1709IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1710{
1711#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1712 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1713#else
1714 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1715#endif
1716}
1717
1718
1719/**
1720 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1723{
1724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1725 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1726#else
1727 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1728#endif
1729}
1730
1731
1732/**
1733 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1736{
1737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1738 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1739#else
1740 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1741#endif
1742}
1743
1744
1745/**
1746 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1747 */
1748IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1749{
1750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1751 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1752#else
1753 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1754#endif
1755}
1756
1757
1758/**
1759 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1760 * address.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1763{
1764#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1765 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1766#else
1767 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1768#endif
1769}
1770
1771
1772/**
1773 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1778 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1779#else
1780 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1791 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1792#else
1793 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1802{
1803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1804 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1805#else
1806 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1807#endif
1808}
1809
1810
1811/**
1812 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1813 * address.
1814 */
1815IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1816{
1817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1818 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1819#else
1820 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1821#endif
1822}
1823
1824
1825/**
1826 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1831 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1832#else
1833 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1844 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1845#else
1846 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1857 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1858#else
1859 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1866 * address.
1867 */
1868IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1869{
1870#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1871 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1872#else
1873 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1874#endif
1875}
1876
1877
1878/**
1879 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1884 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1885#else
1886 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1897 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1898#else
1899 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1910 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1911#else
1912 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1923 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1924#else
1925 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1936 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1937#else
1938 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1945 * address.
1946 */
1947IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1948{
1949#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1950 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1951#else
1952 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1953#endif
1954}
1955
1956
1957/**
1958 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1963 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1964#else
1965 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1976 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1977#else
1978 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1989 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1990#else
1991 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1992#endif
1993}
1994
1995
1996/*********************************************************************************************************************************
1997* Helpers: Commit, rollback & unmap *
1998*********************************************************************************************************************************/
1999
2000/**
2001 * Used by TB code to commit and unmap a read-write memory mapping.
2002 */
2003IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2004{
2005 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2006}
2007
2008
2009/**
2010 * Used by TB code to commit and unmap a read-write memory mapping.
2011 */
2012IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2013{
2014 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2015}
2016
2017
2018/**
2019 * Used by TB code to commit and unmap a write-only memory mapping.
2020 */
2021IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2022{
2023 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2024}
2025
2026
2027/**
2028 * Used by TB code to commit and unmap a read-only memory mapping.
2029 */
2030IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2031{
2032 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2033}
2034
2035
2036/**
2037 * Reinitializes the native recompiler state.
2038 *
2039 * Called before starting a new recompile job.
2040 */
2041static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2042{
2043 pReNative->cLabels = 0;
2044 pReNative->bmLabelTypes = 0;
2045 pReNative->cFixups = 0;
2046 pReNative->cTbExitFixups = 0;
2047#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2048 pReNative->pDbgInfo->cEntries = 0;
2049 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2050#endif
2051 pReNative->pTbOrg = pTb;
2052 pReNative->cCondDepth = 0;
2053 pReNative->uCondSeqNo = 0;
2054 pReNative->uCheckIrqSeqNo = 0;
2055 pReNative->uTlbSeqNo = 0;
2056#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2057 pReNative->fSkippingEFlags = 0;
2058#endif
2059#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2060 pReNative->PostponedEfl.fEFlags = 0;
2061 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid;
2062 pReNative->PostponedEfl.cOpBits = 0;
2063 pReNative->PostponedEfl.idxReg1 = UINT8_MAX;
2064 pReNative->PostponedEfl.idxReg2 = UINT8_MAX;
2065#endif
2066
2067#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2068 pReNative->Core.offPc = 0;
2069# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2070 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2071# endif
2072# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2073 pReNative->Core.fDebugPcInitialized = false;
2074# endif
2075#endif
2076 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2077 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2078#if IEMNATIVE_HST_GREG_COUNT < 32
2079 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2080#endif
2081 ;
2082 pReNative->Core.bmHstRegsWithGstShadow = 0;
2083 pReNative->Core.bmGstRegShadows = 0;
2084#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2085 pReNative->Core.bmGstRegShadowDirty = 0;
2086#endif
2087 pReNative->Core.bmVars = 0;
2088 pReNative->Core.bmStack = 0;
2089 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2090 pReNative->Core.u64ArgVars = UINT64_MAX;
2091
2092 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2093 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2094 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2095 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2096 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2097 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2098 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2116
2117 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2118
2119 /* Full host register reinit: */
2120 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2121 {
2122 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2123 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2124 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2125 }
2126
2127 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2128 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2129#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2130 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2131#endif
2132#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2133 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2134#endif
2135#ifdef IEMNATIVE_REG_FIXED_TMP1
2136 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2137#endif
2138#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2139 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2140#endif
2141 );
2142 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2143 {
2144 fRegs &= ~RT_BIT_32(idxReg);
2145 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2146 }
2147
2148 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2149#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2150 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2151#endif
2152#ifdef IEMNATIVE_REG_FIXED_TMP0
2153 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2154#endif
2155#ifdef IEMNATIVE_REG_FIXED_TMP1
2156 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2157#endif
2158#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2159 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2160#endif
2161
2162 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2163#if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2164 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2165#endif
2166 ;
2167 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2168 pReNative->Core.bmGstSimdRegShadows = 0;
2169 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2170 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2171
2172 /* Full host register reinit: */
2173 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2174 {
2175 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2176 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2177 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2178 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2179 }
2180
2181 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2182 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2183 {
2184 fRegs &= ~RT_BIT_32(idxReg);
2185 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2186 }
2187
2188#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2189 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2190#endif
2191
2192 return pReNative;
2193}
2194
2195
2196/**
2197 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2198 */
2199static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2200{
2201 RTMemFree(pReNative->pInstrBuf);
2202 RTMemFree(pReNative->paLabels);
2203 RTMemFree(pReNative->paFixups);
2204 RTMemFree(pReNative->paTbExitFixups);
2205#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2206 RTMemFree(pReNative->pDbgInfo);
2207#endif
2208 RTMemFree(pReNative);
2209}
2210
2211
2212/**
2213 * Allocates and initializes the native recompiler state.
2214 *
2215 * This is called the first time an EMT wants to recompile something.
2216 *
2217 * @returns Pointer to the new recompiler state.
2218 * @param pVCpu The cross context virtual CPU structure of the calling
2219 * thread.
2220 * @param pTb The TB that's about to be recompiled. When this is NULL,
2221 * the recompiler state is for emitting the common per-chunk
2222 * code from iemNativeRecompileAttachExecMemChunkCtx.
2223 * @thread EMT(pVCpu)
2224 */
2225static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2226{
2227 VMCPU_ASSERT_EMT(pVCpu);
2228
2229 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2230 AssertReturn(pReNative, NULL);
2231
2232 /*
2233 * Try allocate all the buffers and stuff we need.
2234 */
2235 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2236 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2237 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2238 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2239 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2240#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2241 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2242#endif
2243 if (RT_LIKELY( pReNative->pInstrBuf
2244 && pReNative->paLabels
2245 && pReNative->paFixups
2246 && pReNative->paTbExitFixups)
2247#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2248 && pReNative->pDbgInfo
2249#endif
2250 )
2251 {
2252 /*
2253 * Set the buffer & array sizes on success.
2254 */
2255 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2256 pReNative->cLabelsAlloc = _8K / cFactor;
2257 pReNative->cFixupsAlloc = _16K / cFactor;
2258 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2259#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2260 pReNative->cDbgInfoAlloc = _16K / cFactor;
2261#endif
2262
2263 /* Other constant stuff: */
2264 pReNative->pVCpu = pVCpu;
2265
2266 /*
2267 * Done, just reinit it.
2268 */
2269 return iemNativeReInit(pReNative, pTb);
2270 }
2271
2272 /*
2273 * Failed. Cleanup and return.
2274 */
2275 AssertFailed();
2276 iemNativeTerm(pReNative);
2277 return NULL;
2278}
2279
2280
2281/**
2282 * Creates a label
2283 *
2284 * If the label does not yet have a defined position,
2285 * call iemNativeLabelDefine() later to set it.
2286 *
2287 * @returns Label ID. Throws VBox status code on failure, so no need to check
2288 * the return value.
2289 * @param pReNative The native recompile state.
2290 * @param enmType The label type.
2291 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2292 * label is not yet defined (default).
2293 * @param uData Data associated with the lable. Only applicable to
2294 * certain type of labels. Default is zero.
2295 */
2296DECL_HIDDEN_THROW(uint32_t)
2297iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2298 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2299{
2300 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2301#if defined(RT_ARCH_AMD64)
2302 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2303#endif
2304
2305 /*
2306 * Locate existing label definition.
2307 *
2308 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2309 * and uData is zero.
2310 */
2311 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2312 uint32_t const cLabels = pReNative->cLabels;
2313 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2314#ifndef VBOX_STRICT
2315 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2316 && offWhere == UINT32_MAX
2317 && uData == 0
2318#endif
2319 )
2320 {
2321#ifndef VBOX_STRICT
2322 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2323 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2324 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2325 if (idxLabel < pReNative->cLabels)
2326 return idxLabel;
2327#else
2328 for (uint32_t i = 0; i < cLabels; i++)
2329 if ( paLabels[i].enmType == enmType
2330 && paLabels[i].uData == uData)
2331 {
2332 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2333 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2334 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2335 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2336 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2337 return i;
2338 }
2339 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2340 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2341#endif
2342 }
2343
2344 /*
2345 * Make sure we've got room for another label.
2346 */
2347 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2348 { /* likely */ }
2349 else
2350 {
2351 uint32_t cNew = pReNative->cLabelsAlloc;
2352 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2353 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2354 cNew *= 2;
2355 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2356 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2357 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2358 pReNative->paLabels = paLabels;
2359 pReNative->cLabelsAlloc = cNew;
2360 }
2361
2362 /*
2363 * Define a new label.
2364 */
2365 paLabels[cLabels].off = offWhere;
2366 paLabels[cLabels].enmType = enmType;
2367 paLabels[cLabels].uData = uData;
2368 pReNative->cLabels = cLabels + 1;
2369
2370 Assert((unsigned)enmType < 64);
2371 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2372
2373 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2374 {
2375 Assert(uData == 0);
2376 pReNative->aidxUniqueLabels[enmType] = cLabels;
2377 }
2378
2379 if (offWhere != UINT32_MAX)
2380 {
2381#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2382 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2383 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2384#endif
2385 }
2386 return cLabels;
2387}
2388
2389
2390/**
2391 * Defines the location of an existing label.
2392 *
2393 * @param pReNative The native recompile state.
2394 * @param idxLabel The label to define.
2395 * @param offWhere The position.
2396 */
2397DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2398{
2399 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2400 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2401 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2402 pLabel->off = offWhere;
2403#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2404 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2405 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2406#endif
2407}
2408
2409
2410/**
2411 * Looks up a lable.
2412 *
2413 * @returns Label ID if found, UINT32_MAX if not.
2414 */
2415DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2416 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2417{
2418 Assert((unsigned)enmType < 64);
2419 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2420 {
2421 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2422 return pReNative->aidxUniqueLabels[enmType];
2423
2424 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2425 uint32_t const cLabels = pReNative->cLabels;
2426 for (uint32_t i = 0; i < cLabels; i++)
2427 if ( paLabels[i].enmType == enmType
2428 && paLabels[i].uData == uData
2429 && ( paLabels[i].off == offWhere
2430 || offWhere == UINT32_MAX
2431 || paLabels[i].off == UINT32_MAX))
2432 return i;
2433 }
2434 return UINT32_MAX;
2435}
2436
2437
2438/**
2439 * Adds a fixup.
2440 *
2441 * @throws VBox status code (int) on failure.
2442 * @param pReNative The native recompile state.
2443 * @param offWhere The instruction offset of the fixup location.
2444 * @param idxLabel The target label ID for the fixup.
2445 * @param enmType The fixup type.
2446 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2447 */
2448DECL_HIDDEN_THROW(void)
2449iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2450 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2451{
2452 Assert(idxLabel <= UINT16_MAX);
2453 Assert((unsigned)enmType <= UINT8_MAX);
2454#ifdef RT_ARCH_ARM64
2455 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2456 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2457 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2458#endif
2459
2460 /*
2461 * Make sure we've room.
2462 */
2463 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2464 uint32_t const cFixups = pReNative->cFixups;
2465 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2466 { /* likely */ }
2467 else
2468 {
2469 uint32_t cNew = pReNative->cFixupsAlloc;
2470 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2471 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2472 cNew *= 2;
2473 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2474 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2475 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2476 pReNative->paFixups = paFixups;
2477 pReNative->cFixupsAlloc = cNew;
2478 }
2479
2480 /*
2481 * Add the fixup.
2482 */
2483 paFixups[cFixups].off = offWhere;
2484 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2485 paFixups[cFixups].enmType = enmType;
2486 paFixups[cFixups].offAddend = offAddend;
2487 pReNative->cFixups = cFixups + 1;
2488}
2489
2490
2491/**
2492 * Adds a fixup to the per chunk tail code.
2493 *
2494 * @throws VBox status code (int) on failure.
2495 * @param pReNative The native recompile state.
2496 * @param offWhere The instruction offset of the fixup location.
2497 * @param enmExitReason The exit reason to jump to.
2498 */
2499DECL_HIDDEN_THROW(void)
2500iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2501{
2502 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2503
2504 /*
2505 * Make sure we've room.
2506 */
2507 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2508 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2509 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2510 { /* likely */ }
2511 else
2512 {
2513 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2514 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2515 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2516 cNew *= 2;
2517 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2518 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2519 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2520 pReNative->paTbExitFixups = paTbExitFixups;
2521 pReNative->cTbExitFixupsAlloc = cNew;
2522 }
2523
2524 /*
2525 * Add the fixup.
2526 */
2527 paTbExitFixups[cTbExitFixups].off = offWhere;
2528 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2529 pReNative->cTbExitFixups = cTbExitFixups + 1;
2530}
2531
2532
2533/**
2534 * Slow code path for iemNativeInstrBufEnsure.
2535 */
2536DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2537{
2538 /* Double the buffer size till we meet the request. */
2539 uint32_t cNew = pReNative->cInstrBufAlloc;
2540 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2541 do
2542 cNew *= 2;
2543 while (cNew < off + cInstrReq);
2544
2545 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2546#ifdef RT_ARCH_ARM64
2547 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2548#else
2549 uint32_t const cbMaxInstrBuf = _2M;
2550#endif
2551 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2552
2553 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2554 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2555
2556#ifdef VBOX_STRICT
2557 pReNative->offInstrBufChecked = off + cInstrReq;
2558#endif
2559 pReNative->cInstrBufAlloc = cNew;
2560 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2561}
2562
2563#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2564
2565/**
2566 * Grows the static debug info array used during recompilation.
2567 *
2568 * @returns Pointer to the new debug info block; throws VBox status code on
2569 * failure, so no need to check the return value.
2570 */
2571DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2572{
2573 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2574 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2575 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2576 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2577 pReNative->pDbgInfo = pDbgInfo;
2578 pReNative->cDbgInfoAlloc = cNew;
2579 return pDbgInfo;
2580}
2581
2582
2583/**
2584 * Adds a new debug info uninitialized entry, returning the pointer to it.
2585 */
2586DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2587{
2588 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2589 { /* likely */ }
2590 else
2591 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2592 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2593}
2594
2595
2596/**
2597 * Debug Info: Adds a native offset record, if necessary.
2598 */
2599DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2600{
2601 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2602
2603 /*
2604 * Do we need this one?
2605 */
2606 uint32_t const offPrev = pDbgInfo->offNativeLast;
2607 if (offPrev == off)
2608 return;
2609 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2610
2611 /*
2612 * Add it.
2613 */
2614 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2615 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2616 pEntry->NativeOffset.offNative = off;
2617 pDbgInfo->offNativeLast = off;
2618}
2619
2620
2621/**
2622 * Debug Info: Record info about a label.
2623 */
2624static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2625{
2626 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2627 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2628 pEntry->Label.uUnused = 0;
2629 pEntry->Label.enmLabel = (uint8_t)enmType;
2630 pEntry->Label.uData = uData;
2631}
2632
2633
2634/**
2635 * Debug Info: Record info about a threaded call.
2636 */
2637static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2638{
2639 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2640 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2641 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2642 pEntry->ThreadedCall.uUnused = 0;
2643 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2644}
2645
2646
2647/**
2648 * Debug Info: Record info about a new guest instruction.
2649 */
2650static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2651{
2652 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2653 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2654 pEntry->GuestInstruction.uUnused = 0;
2655 pEntry->GuestInstruction.fExec = fExec;
2656}
2657
2658
2659/**
2660 * Debug Info: Record info about guest register shadowing.
2661 */
2662DECL_HIDDEN_THROW(void)
2663iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2664 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2665{
2666 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2667 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2668 pEntry->GuestRegShadowing.uUnused = 0;
2669 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2670 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2671 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2672# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2673 Assert( idxHstReg != UINT8_MAX
2674 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2675# endif
2676}
2677
2678
2679/**
2680 * Debug Info: Record info about guest register shadowing.
2681 */
2682DECL_HIDDEN_THROW(void)
2683iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2684 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2685{
2686 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2687 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2688 pEntry->GuestSimdRegShadowing.uUnused = 0;
2689 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2690 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2691 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2692}
2693
2694
2695# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2696/**
2697 * Debug Info: Record info about delayed RIP updates.
2698 */
2699DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2700{
2701 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2702 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2703 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2704 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2705}
2706# endif
2707
2708
2709/**
2710 * Debug Info: Record info about a dirty guest register.
2711 */
2712DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2713 uint8_t idxGstReg, uint8_t idxHstReg)
2714{
2715 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2716 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2717 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2718 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2719 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2720}
2721
2722
2723/**
2724 * Debug Info: Record info about a dirty guest register writeback operation.
2725 */
2726DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2727{
2728 unsigned const cBitsGstRegMask = 25;
2729 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2730
2731 /* The first block of 25 bits: */
2732 if (fGstReg & fGstRegMask)
2733 {
2734 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2735 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2736 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2737 pEntry->GuestRegWriteback.cShift = 0;
2738 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2739 fGstReg &= ~(uint64_t)fGstRegMask;
2740 if (!fGstReg)
2741 return;
2742 }
2743
2744 /* The second block of 25 bits: */
2745 fGstReg >>= cBitsGstRegMask;
2746 if (fGstReg & fGstRegMask)
2747 {
2748 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2749 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2750 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2751 pEntry->GuestRegWriteback.cShift = 0;
2752 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2753 fGstReg &= ~(uint64_t)fGstRegMask;
2754 if (!fGstReg)
2755 return;
2756 }
2757
2758 /* The last block with 14 bits: */
2759 fGstReg >>= cBitsGstRegMask;
2760 Assert(fGstReg & fGstRegMask);
2761 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2762 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2763 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2764 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2765 pEntry->GuestRegWriteback.cShift = 2;
2766 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2767}
2768
2769
2770# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2771/**
2772 * Debug Info: Record info about emitting a postponed EFLAGS calculation.
2773 */
2774DECL_HIDDEN_THROW(void)
2775iemNativeDbgInfoAddPostponedEFlagsCalc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVE_POSTPONED_EFL_OP_T enmOp,
2776 uint8_t cOpBits, uint8_t idxEmit)
2777{
2778 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2779 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2780 pEntry->PostponedEflCalc.uType = kIemTbDbgEntryType_PostponedEFlagsCalc;
2781 pEntry->PostponedEflCalc.enmOp = (unsigned)enmOp;
2782 pEntry->PostponedEflCalc.cOpBits = cOpBits;
2783 pEntry->PostponedEflCalc.idxEmit = idxEmit;
2784 pEntry->PostponedEflCalc.uUnused = 0;
2785}
2786# endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */
2787
2788#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2789
2790
2791/*********************************************************************************************************************************
2792* Register Allocator *
2793*********************************************************************************************************************************/
2794
2795/**
2796 * Register parameter indexes (indexed by argument number).
2797 */
2798DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2799{
2800 IEMNATIVE_CALL_ARG0_GREG,
2801 IEMNATIVE_CALL_ARG1_GREG,
2802 IEMNATIVE_CALL_ARG2_GREG,
2803 IEMNATIVE_CALL_ARG3_GREG,
2804#if defined(IEMNATIVE_CALL_ARG4_GREG)
2805 IEMNATIVE_CALL_ARG4_GREG,
2806# if defined(IEMNATIVE_CALL_ARG5_GREG)
2807 IEMNATIVE_CALL_ARG5_GREG,
2808# if defined(IEMNATIVE_CALL_ARG6_GREG)
2809 IEMNATIVE_CALL_ARG6_GREG,
2810# if defined(IEMNATIVE_CALL_ARG7_GREG)
2811 IEMNATIVE_CALL_ARG7_GREG,
2812# endif
2813# endif
2814# endif
2815#endif
2816};
2817AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2818
2819/**
2820 * Call register masks indexed by argument count.
2821 */
2822DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2823{
2824 0,
2825 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2826 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2827 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2828 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2829 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2830#if defined(IEMNATIVE_CALL_ARG4_GREG)
2831 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2832 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2833# if defined(IEMNATIVE_CALL_ARG5_GREG)
2834 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2835 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2836# if defined(IEMNATIVE_CALL_ARG6_GREG)
2837 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2838 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2839 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2840# if defined(IEMNATIVE_CALL_ARG7_GREG)
2841 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2842 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2843 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2844# endif
2845# endif
2846# endif
2847#endif
2848};
2849
2850#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2851/**
2852 * BP offset of the stack argument slots.
2853 *
2854 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2855 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2856 */
2857DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2858{
2859 IEMNATIVE_FP_OFF_STACK_ARG0,
2860# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2861 IEMNATIVE_FP_OFF_STACK_ARG1,
2862# endif
2863# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2864 IEMNATIVE_FP_OFF_STACK_ARG2,
2865# endif
2866# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2867 IEMNATIVE_FP_OFF_STACK_ARG3,
2868# endif
2869};
2870AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2871#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2872
2873/**
2874 * Info about shadowed guest register values.
2875 * @see IEMNATIVEGSTREG
2876 */
2877DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2878{
2879#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2880 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2881 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2882 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2883 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2884 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2885 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2886 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2887 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2888 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2889 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2896 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2897 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2898 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2899 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2900 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2901 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2902 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2903 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2904 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2905 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2906 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2907 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2908 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2909 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2910 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2911 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2912 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2913 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2914 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2915 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2916 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2917 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2918 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2919 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2920 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2921 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2922 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2923 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2924 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2925 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2926 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2927 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2928 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2929 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2930 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2931 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2932 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2933 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2934#undef CPUMCTX_OFF_AND_SIZE
2935};
2936AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2937
2938
2939/** Host CPU general purpose register names. */
2940DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2941{
2942#ifdef RT_ARCH_AMD64
2943 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2944#elif RT_ARCH_ARM64
2945 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2946 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2947#else
2948# error "port me"
2949#endif
2950};
2951
2952
2953#if 0 /* unused */
2954/**
2955 * Tries to locate a suitable register in the given register mask.
2956 *
2957 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2958 * failed.
2959 *
2960 * @returns Host register number on success, returns UINT8_MAX on failure.
2961 */
2962static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2963{
2964 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2965 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2966 if (fRegs)
2967 {
2968 /** @todo pick better here: */
2969 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2970
2971 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2972 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2973 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2974 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2975
2976 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2977 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2978 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2979 return idxReg;
2980 }
2981 return UINT8_MAX;
2982}
2983#endif /* unused */
2984
2985#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2986
2987/**
2988 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2989 *
2990 * @returns New code buffer offset on success, UINT32_MAX on failure.
2991 * @param pReNative .
2992 * @param off The current code buffer position.
2993 * @param enmGstReg The guest register to store to.
2994 * @param idxHstReg The host register to store from.
2995 */
2996DECL_FORCE_INLINE_THROW(uint32_t)
2997iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2998{
2999 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3000 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3001
3002 switch (g_aGstShadowInfo[enmGstReg].cb)
3003 {
3004 case sizeof(uint64_t):
3005 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3006 case sizeof(uint32_t):
3007 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3008 case sizeof(uint16_t):
3009 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3010# if 0 /* not present in the table. */
3011 case sizeof(uint8_t):
3012 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3013# endif
3014 default:
3015 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3016 }
3017}
3018
3019
3020/**
3021 * Emits code to flush a pending write of the given guest register,
3022 * version with alternative core state.
3023 *
3024 * @returns New code buffer offset.
3025 * @param pReNative The native recompile state.
3026 * @param off Current code buffer position.
3027 * @param pCore Alternative core state.
3028 * @param enmGstReg The guest register to flush.
3029 */
3030DECL_HIDDEN_THROW(uint32_t)
3031iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3032{
3033 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3034
3035 AssertCompile(kIemNativeGstReg_GprFirst == 0);
3036 Assert( enmGstReg <= kIemNativeGstReg_GprLast
3037 || enmGstReg == kIemNativeGstReg_MxCsr);
3038 Assert( idxHstReg != UINT8_MAX
3039 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3040 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3041 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3042
3043 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3044
3045 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3046 return off;
3047}
3048
3049
3050/**
3051 * Emits code to flush a pending write of the given guest register.
3052 *
3053 * @returns New code buffer offset.
3054 * @param pReNative The native recompile state.
3055 * @param off Current code buffer position.
3056 * @param enmGstReg The guest register to flush.
3057 */
3058DECL_HIDDEN_THROW(uint32_t)
3059iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3060{
3061 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3062
3063 AssertCompile(kIemNativeGstReg_GprFirst == 0);
3064 Assert( enmGstReg <= kIemNativeGstReg_GprLast
3065 || enmGstReg == kIemNativeGstReg_MxCsr);
3066 Assert( idxHstReg != UINT8_MAX
3067 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3068 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3069 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3070
3071 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3072
3073 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3074 return off;
3075}
3076
3077
3078/**
3079 * Flush the given set of guest registers if marked as dirty.
3080 *
3081 * @returns New code buffer offset.
3082 * @param pReNative The native recompile state.
3083 * @param off Current code buffer position.
3084 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3085 * @note Must not modify the host status flags!
3086 */
3087DECL_HIDDEN_THROW(uint32_t)
3088iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3089{
3090 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3091 if (bmGstRegShadowDirty)
3092 {
3093# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3094 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3095 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3096# endif
3097 do
3098 {
3099 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3100 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3101 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3102 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3103 } while (bmGstRegShadowDirty);
3104 }
3105
3106 return off;
3107}
3108
3109
3110/**
3111 * Flush all shadowed guest registers marked as dirty for the given host register.
3112 *
3113 * @returns New code buffer offset.
3114 * @param pReNative The native recompile state.
3115 * @param off Current code buffer position.
3116 * @param idxHstReg The host register.
3117 *
3118 * @note This doesn't do any unshadowing of guest registers from the host register.
3119 *
3120 * @note Must not modify the host status flags!
3121 */
3122DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3123{
3124 /* We need to flush any pending guest register writes this host register shadows. */
3125 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3126 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3127 {
3128# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3129 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3130 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3131# endif
3132 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3133 do
3134 {
3135 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3136 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3137 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3138 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3139 } while (bmGstRegShadowDirty);
3140 }
3141
3142 return off;
3143}
3144
3145#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3146
3147
3148/**
3149 * Locate a register, possibly freeing one up.
3150 *
3151 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3152 * failed.
3153 *
3154 * @returns Host register number on success. Returns UINT8_MAX if no registers
3155 * found, the caller is supposed to deal with this and raise a
3156 * allocation type specific status code (if desired).
3157 *
3158 * @throws VBox status code if we're run into trouble spilling a variable of
3159 * recording debug info. Does NOT throw anything if we're out of
3160 * registers, though.
3161 *
3162 * @note Must not modify the host status flags!
3163 */
3164static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3165 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3166{
3167 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3168 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3169 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3170
3171 /*
3172 * Try a freed register that's shadowing a guest register.
3173 */
3174 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3175 if (fRegs)
3176 {
3177 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3178
3179#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3180 /*
3181 * When we have liveness information, we use it to kick out all shadowed
3182 * guest register that will not be needed any more in this TB. If we're
3183 * lucky, this may prevent us from ending up here again.
3184 *
3185 * Note! We must consider the previous entry here so we don't free
3186 * anything that the current threaded function requires (current
3187 * entry is produced by the next threaded function).
3188 */
3189 uint32_t const idxCurCall = pReNative->idxCurCall;
3190 if (idxCurCall > 0)
3191 {
3192 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3193 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3194
3195 /* Merge EFLAGS. */
3196 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3197 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3198 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3199 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3200 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3201
3202 /* If it matches any shadowed registers. */
3203 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3204 {
3205#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3206 /* Writeback any dirty shadow registers we are about to unshadow. */
3207 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3208#endif
3209
3210 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3211 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3212 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3213
3214 /* See if we've got any unshadowed registers we can return now. */
3215 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3216 if (fUnshadowedRegs)
3217 {
3218 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3219 return (fPreferVolatile
3220 ? ASMBitFirstSetU32(fUnshadowedRegs)
3221 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3222 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3223 - 1;
3224 }
3225 }
3226 }
3227#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3228
3229 unsigned const idxReg = (fPreferVolatile
3230 ? ASMBitFirstSetU32(fRegs)
3231 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3232 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3233 - 1;
3234
3235 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3236 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3237 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3238 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3239
3240#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3241 /* We need to flush any pending guest register writes this host register shadows. */
3242 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3243#endif
3244
3245 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3246 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3247 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3248 return idxReg;
3249 }
3250
3251 /*
3252 * Try free up a variable that's in a register.
3253 *
3254 * We do two rounds here, first evacuating variables we don't need to be
3255 * saved on the stack, then in the second round move things to the stack.
3256 */
3257 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3258 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3259 {
3260 uint32_t fVars = pReNative->Core.bmVars;
3261 while (fVars)
3262 {
3263 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3264 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* (this is the GPR allocator) */
3265 {
3266 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3267 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3268 && (RT_BIT_32(idxReg) & fRegMask)
3269 && ( iLoop == 0
3270 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3271 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3272 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3273 {
3274 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3275 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3276 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3277 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3278 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3279 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3280#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3281 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3282#endif
3283
3284 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3285 {
3286 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3287 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3288 }
3289
3290 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3291 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3292
3293 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3294 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3295 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3296 return idxReg;
3297 }
3298 }
3299 fVars &= ~RT_BIT_32(idxVar);
3300 }
3301 }
3302
3303 return UINT8_MAX;
3304}
3305
3306
3307/**
3308 * Reassigns a variable to a different register specified by the caller.
3309 *
3310 * @returns The new code buffer position.
3311 * @param pReNative The native recompile state.
3312 * @param off The current code buffer position.
3313 * @param idxVar The variable index.
3314 * @param idxRegOld The old host register number.
3315 * @param idxRegNew The new host register number.
3316 * @param pszCaller The caller for logging.
3317 */
3318static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3319 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3320{
3321 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3322 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3323 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3324 RT_NOREF(pszCaller);
3325
3326#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3327 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3328#endif
3329 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3330
3331 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3332#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3333 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3334#endif
3335 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3336 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3337 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3338
3339 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3340 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3341 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3342 if (fGstRegShadows)
3343 {
3344 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3345 | RT_BIT_32(idxRegNew);
3346 while (fGstRegShadows)
3347 {
3348 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3349 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3350
3351 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3352 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3353 }
3354 }
3355
3356 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3357 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3358 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3359 return off;
3360}
3361
3362
3363/**
3364 * Moves a variable to a different register or spills it onto the stack.
3365 *
3366 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3367 * kinds can easily be recreated if needed later.
3368 *
3369 * @returns The new code buffer position.
3370 * @param pReNative The native recompile state.
3371 * @param off The current code buffer position.
3372 * @param idxVar The variable index.
3373 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3374 * call-volatile registers.
3375 */
3376DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3377 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3378{
3379 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3380 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3381 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3382 Assert(!pVar->fRegAcquired);
3383
3384 uint8_t const idxRegOld = pVar->idxReg;
3385 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3386 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3387 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3388 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3389 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3390 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3391 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3392 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3393#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3394 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3395#endif
3396
3397
3398 /** @todo Add statistics on this.*/
3399 /** @todo Implement basic variable liveness analysis (python) so variables
3400 * can be freed immediately once no longer used. This has the potential to
3401 * be trashing registers and stack for dead variables.
3402 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3403
3404 /*
3405 * First try move it to a different register, as that's cheaper.
3406 */
3407 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3408 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3409 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3410 if (fRegs)
3411 {
3412 /* Avoid using shadow registers, if possible. */
3413 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3414 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3415 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3416 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3417 }
3418
3419 /*
3420 * Otherwise we must spill the register onto the stack.
3421 */
3422 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3423 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3424 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3425 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3426
3427 pVar->idxReg = UINT8_MAX;
3428 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3429 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3430 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3431 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3432 return off;
3433}
3434
3435
3436/**
3437 * Allocates a temporary host general purpose register.
3438 *
3439 * This may emit code to save register content onto the stack in order to free
3440 * up a register.
3441 *
3442 * @returns The host register number; throws VBox status code on failure,
3443 * so no need to check the return value.
3444 * @param pReNative The native recompile state.
3445 * @param poff Pointer to the variable with the code buffer
3446 * position. This will be update if we need to move
3447 * a variable from register to stack in order to
3448 * satisfy the request.
3449 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3450 * registers (@c true, default) or the other way
3451 * around (@c false, for
3452 * iemNativeRegAllocTmpForGuestReg()).
3453 *
3454 * @note Must not modify the host status flags!
3455 */
3456template<bool const a_fPreferVolatile>
3457DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3458{
3459 /*
3460 * Try find a completely unused register, preferably a call-volatile one.
3461 */
3462 uint8_t idxReg;
3463 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3464 & ~pReNative->Core.bmHstRegsWithGstShadow
3465 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3466 if (fRegs)
3467 {
3468 if (a_fPreferVolatile)
3469 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3470 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3471 else
3472 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3473 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3474 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3475 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3476 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3477 }
3478 else
3479 {
3480 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile);
3481 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3482 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3483 }
3484 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3485}
3486
3487
3488/** See iemNativeRegAllocTmpInt for details. */
3489DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3490{
3491 return iemNativeRegAllocTmpInt<true>(pReNative, poff);
3492}
3493
3494
3495/** See iemNativeRegAllocTmpInt for details. */
3496DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3497{
3498 return iemNativeRegAllocTmpInt<false>(pReNative, poff);
3499}
3500
3501
3502/**
3503 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3504 * registers.
3505 *
3506 * @returns The host register number; throws VBox status code on failure,
3507 * so no need to check the return value.
3508 * @param pReNative The native recompile state.
3509 * @param poff Pointer to the variable with the code buffer
3510 * position. This will be update if we need to move
3511 * a variable from register to stack in order to
3512 * satisfy the request.
3513 * @param fRegMask Mask of acceptable registers.
3514 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3515 * registers (@c true, default) or the other way
3516 * around (@c false, for
3517 * iemNativeRegAllocTmpForGuestReg()).
3518 */
3519template<bool const a_fPreferVolatile>
3520DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3521{
3522 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3523 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3524
3525 /*
3526 * Try find a completely unused register, preferably a call-volatile one.
3527 */
3528 uint8_t idxReg;
3529 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3530 & ~pReNative->Core.bmHstRegsWithGstShadow
3531 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3532 & fRegMask;
3533 if (fRegs)
3534 {
3535 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3536 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3537 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3538 else
3539 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3540 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3541 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3542 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3543 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3544 }
3545 else
3546 {
3547 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, fRegMask);
3548 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3549 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3550 }
3551 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3552}
3553
3554
3555/** See iemNativeRegAllocTmpExInt for details. */
3556DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3557{
3558 return iemNativeRegAllocTmpExInt<true>(pReNative, poff, fRegMask);
3559}
3560
3561
3562/** See iemNativeRegAllocTmpExInt for details. */
3563DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpExPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3564{
3565 return iemNativeRegAllocTmpExInt<false>(pReNative, poff, fRegMask);
3566}
3567
3568
3569/** Internal templated variation of iemNativeRegAllocTmpEx. */
3570template<uint32_t const a_fRegMask, bool const a_fPreferVolatile>
3571DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3572{
3573 AssertCompile(!(a_fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3574 AssertCompile(!(a_fRegMask & IEMNATIVE_REG_FIXED_MASK));
3575
3576 /*
3577 * Try find a completely unused register, preferably a call-volatile one.
3578 */
3579 uint8_t idxReg;
3580 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3581 & ~pReNative->Core.bmHstRegsWithGstShadow
3582 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3583 & a_fRegMask;
3584 if (fRegs)
3585 {
3586 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3587 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3588 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3589 else
3590 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3591 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3592 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3593 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3594 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3595 }
3596 else
3597 {
3598 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, a_fRegMask);
3599 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3600 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3601 }
3602 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3603}
3604
3605
3606/**
3607 * Allocates a temporary register for loading an immediate value into.
3608 *
3609 * This will emit code to load the immediate, unless there happens to be an
3610 * unused register with the value already loaded.
3611 *
3612 * The caller will not modify the returned register, it must be considered
3613 * read-only. Free using iemNativeRegFreeTmpImm.
3614 *
3615 * @returns The host register number; throws VBox status code on failure, so no
3616 * need to check the return value.
3617 * @param pReNative The native recompile state.
3618 * @param poff Pointer to the variable with the code buffer position.
3619 * @param uImm The immediate value that the register must hold upon
3620 * return.
3621 * @note Prefers volatile registers.
3622 * @note Reusing immediate values has not been implemented yet.
3623 */
3624DECL_HIDDEN_THROW(uint8_t)
3625iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm)
3626{
3627 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff);
3628 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3629 return idxReg;
3630}
3631
3632
3633/**
3634 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3635 * iemNativeRegAllocTmpForGuestEFlags().
3636 *
3637 * See iemNativeRegAllocTmpForGuestRegInt() for details.
3638 */
3639template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, uint32_t const a_fRegMask>
3640static uint8_t iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3641{
3642 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3643#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3644 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3645#endif
3646
3647 /*
3648 * First check if the guest register value is already in a host register.
3649 */
3650 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3651 {
3652 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3653 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3654 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3655 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3656
3657 /* It's not supposed to be allocated... */
3658 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3659 {
3660 /*
3661 * If the register will trash the guest shadow copy, try find a
3662 * completely unused register we can use instead. If that fails,
3663 * we need to disassociate the host reg from the guest reg.
3664 */
3665 /** @todo would be nice to know if preserving the register is in any way helpful. */
3666 /* If the purpose is calculations, try duplicate the register value as
3667 we'll be clobbering the shadow. */
3668 if ( a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3669 && ( ~pReNative->Core.bmHstRegs
3670 & ~pReNative->Core.bmHstRegsWithGstShadow
3671 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3672 {
3673 uint8_t const idxRegNew = iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3674
3675 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3676
3677 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3678 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3679 g_apszIemNativeHstRegNames[idxRegNew]));
3680 idxReg = idxRegNew;
3681 }
3682 /* If the current register matches the restrictions, go ahead and allocate
3683 it for the caller. */
3684 else if (a_fRegMask & RT_BIT_32(idxReg))
3685 {
3686 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3687 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3688 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3689 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3690 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n", g_apszIemNativeHstRegNames[idxReg],
3691 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3692 else
3693 {
3694 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3695 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3696 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3697 }
3698 }
3699 /* Otherwise, allocate a register that satisfies the caller and transfer
3700 the shadowing if compatible with the intended use. (This basically
3701 means the call wants a non-volatile register (RSP push/pop scenario).) */
3702 else
3703 {
3704 Assert(!(a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3705 uint8_t const idxRegNew = (a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3706 && a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3707 ? iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg))
3708 : iemNativeRegAllocTmpExPreferNonVolatile(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg));
3709 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3710 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3711 {
3712 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3713 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3714 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3715 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3716 }
3717 else
3718 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3719 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3720 g_apszIemNativeHstRegNames[idxRegNew]));
3721 idxReg = idxRegNew;
3722 }
3723 }
3724 else
3725 {
3726 /*
3727 * Oops. Shadowed guest register already allocated!
3728 *
3729 * Allocate a new register, copy the value and, if updating, the
3730 * guest shadow copy assignment to the new register.
3731 */
3732 AssertMsg( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3733 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3734 ("This shouldn't happen: idxReg=%d enmGstReg=%d a_enmIntendedUse=%s\n",
3735 idxReg, enmGstReg, s_pszIntendedUse[a_enmIntendedUse]));
3736
3737 /** @todo share register for readonly access. */
3738 uint8_t const idxRegNew = a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3739 ? iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff)
3740 : iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff);
3741
3742 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3743 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3744
3745 if RT_CONSTEXPR_IF( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3746 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3747 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3748 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3749 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3750 else
3751 {
3752 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3753 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3754 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3755 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3756 }
3757 idxReg = idxRegNew;
3758 }
3759 Assert(RT_BIT_32(idxReg) & a_fRegMask); /* See assumption in fNoVolatileRegs docs. */
3760
3761#ifdef VBOX_STRICT
3762 /* Strict builds: Check that the value is correct. */
3763 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3764#endif
3765
3766#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3767 /** @todo r=aeichner Implement for registers other than GPR as well. */
3768 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3769 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3770 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3771 && enmGstReg <= kIemNativeGstReg_GprLast)
3772 || enmGstReg == kIemNativeGstReg_MxCsr)
3773 {
3774# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3775 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3776 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3777# endif
3778 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3779 }
3780#endif
3781
3782 return idxReg;
3783 }
3784
3785 /*
3786 * Allocate a new register, load it with the guest value and designate it as a copy of the
3787 */
3788 uint8_t const idxRegNew = a_enmIntendedUse != kIemNativeGstRegUse_Calculation
3789 ? iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff)
3790 : iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3791
3792 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3793 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3794
3795 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3796 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3797 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3798 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3799
3800#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3801 /** @todo r=aeichner Implement for registers other than GPR as well. */
3802 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3803 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3804 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3805 && enmGstReg <= kIemNativeGstReg_GprLast)
3806 || enmGstReg == kIemNativeGstReg_MxCsr)
3807 {
3808# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3809 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3810 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3811# endif
3812 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3813 }
3814#endif
3815
3816 return idxRegNew;
3817}
3818
3819
3820/**
3821 * Allocates a temporary host general purpose register for keeping a guest
3822 * register value.
3823 *
3824 * Since we may already have a register holding the guest register value,
3825 * code will be emitted to do the loading if that's not the case. Code may also
3826 * be emitted if we have to free up a register to satify the request.
3827 *
3828 * @returns The host register number; throws VBox status code on failure, so no
3829 * need to check the return value.
3830 * @param pReNative The native recompile state.
3831 * @param poff Pointer to the variable with the code buffer
3832 * position. This will be update if we need to move
3833 * a variable from register to stack in order to
3834 * satisfy the request.
3835 * @param enmGstReg The guest register that will is to be updated.
3836 * @tparam a_enmIntendedUse How the caller will be using the host register.
3837 * @tparam a_fNonVolatileRegs Set if no volatile register allowed, clear if
3838 * any register is okay (default).
3839 * The ASSUMPTION here is that the caller has
3840 * already flushed all volatile registers,
3841 * so this is only applied if we allocate a new
3842 * register.
3843 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3844 */
3845template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, bool const a_fNonVolatileRegs>
3846DECL_FORCE_INLINE_THROW(uint8_t)
3847iemNativeRegAllocTmpForGuestRegInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3848{
3849#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3850 AssertMsg( pReNative->idxCurCall == 0
3851 || enmGstReg == kIemNativeGstReg_Pc
3852 || (a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3853 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3854 : a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3855 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3856 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3857 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3858#endif
3859
3860 if RT_CONSTEXPR_IF(!a_fNonVolatileRegs)
3861 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3862 IEMNATIVE_HST_GREG_MASK
3863 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, enmGstReg);
3864 else /* keep else, is required by MSC */
3865 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3866 IEMNATIVE_HST_GREG_MASK
3867 & ~IEMNATIVE_REG_FIXED_MASK
3868 & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK>(pReNative, poff, enmGstReg);
3869}
3870
3871/* Variants including volatile registers: */
3872
3873DECL_HIDDEN_THROW(uint8_t)
3874iemNativeRegAllocTmpForGuestRegReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3875{
3876 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, false>(pReNative, poff, enmGstReg);
3877}
3878
3879DECL_HIDDEN_THROW(uint8_t)
3880iemNativeRegAllocTmpForGuestRegUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3881{
3882 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, false>(pReNative, poff, enmGstReg);
3883}
3884
3885DECL_HIDDEN_THROW(uint8_t)
3886iemNativeRegAllocTmpForGuestRegFullWrite(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3887{
3888 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, false>(pReNative, poff, enmGstReg);
3889}
3890
3891DECL_HIDDEN_THROW(uint8_t)
3892iemNativeRegAllocTmpForGuestRegCalculation(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3893{
3894 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, false>(pReNative, poff, enmGstReg);
3895}
3896
3897/* Variants excluding any volatile registers: */
3898
3899DECL_HIDDEN_THROW(uint8_t)
3900iemNativeRegAllocTmpForGuestRegReadOnlyNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3901{
3902 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, true>(pReNative, poff, enmGstReg);
3903}
3904
3905DECL_HIDDEN_THROW(uint8_t)
3906iemNativeRegAllocTmpForGuestRegUpdateNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3907{
3908 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, true>(pReNative, poff, enmGstReg);
3909}
3910
3911DECL_HIDDEN_THROW(uint8_t)
3912iemNativeRegAllocTmpForGuestRegFullWriteNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3913{
3914 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, true>(pReNative, poff, enmGstReg);
3915}
3916
3917DECL_HIDDEN_THROW(uint8_t)
3918iemNativeRegAllocTmpForGuestRegCalculationNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3919{
3920 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, true>(pReNative, poff, enmGstReg);
3921}
3922
3923
3924
3925#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3926/**
3927 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3928 *
3929 * This takes additional arguments for covering liveness assertions in strict
3930 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3931 * kIemNativeGstReg_EFlags as argument.
3932 */
3933template<IEMNATIVEGSTREGUSE const a_enmIntendedUse>
3934DECL_FORCE_INLINE_THROW(uint8_t)
3935iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3936 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3937{
3938 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3939 {
3940 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3941 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3942 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3943 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3944 uint32_t fState;
3945# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3946 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3947 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3948 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3949 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3950 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3951 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3952 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3953 ) \
3954 , ("%s - %u\n", #a_enmGstEfl, fState))
3955 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3956 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3957 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3958 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3959 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3960 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3961 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3962# undef MY_ASSERT_ONE_EFL
3963 }
3964 RT_NOREF(fPotentialCall);
3965
3966 AssertCompile(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate);
3967 if RT_CONSTEXPR_IF(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly)
3968 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ReadOnly,
3969 IEMNATIVE_HST_GREG_MASK
3970 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3971 else /* keep else, is required by MSC */
3972 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ForUpdate,
3973 IEMNATIVE_HST_GREG_MASK
3974 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3975}
3976
3977
3978DECL_HIDDEN_THROW(uint8_t)
3979iemNativeRegAllocTmpForGuestEFlagsReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3980 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3981{
3982 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ReadOnly>(pReNative, poff, fRead, fWrite, fPotentialCall);
3983}
3984
3985DECL_HIDDEN_THROW(uint8_t)
3986iemNativeRegAllocTmpForGuestEFlagsForUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3987 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3988{
3989 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ForUpdate>(pReNative, poff, fRead, fWrite, fPotentialCall);
3990}
3991
3992#endif
3993
3994
3995
3996/**
3997 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
3998 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
3999 *
4000 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
4001 */
4002DECL_FORCE_INLINE(uint8_t)
4003iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4004{
4005 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4006
4007 /*
4008 * First check if the guest register value is already in a host register.
4009 */
4010 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4011 {
4012 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4013 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4014 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4015 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4016
4017 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4018 {
4019 /*
4020 * We only do readonly use here, so easy compared to the other
4021 * variant of this code.
4022 */
4023 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4024 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4025 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4026 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4027 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4028
4029#ifdef VBOX_STRICT
4030 /* Strict builds: Check that the value is correct. */
4031 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4032#else
4033 RT_NOREF(poff);
4034#endif
4035 return idxReg;
4036 }
4037 }
4038
4039 return UINT8_MAX;
4040}
4041
4042
4043/**
4044 * Allocates a temporary host general purpose register that already holds the
4045 * given guest register value.
4046 *
4047 * The use case for this function is places where the shadowing state cannot be
4048 * modified due to branching and such. This will fail if the we don't have a
4049 * current shadow copy handy or if it's incompatible. The only code that will
4050 * be emitted here is value checking code in strict builds.
4051 *
4052 * The intended use can only be readonly!
4053 *
4054 * @returns The host register number, UINT8_MAX if not present.
4055 * @param pReNative The native recompile state.
4056 * @param poff Pointer to the instruction buffer offset.
4057 * Will be updated in strict builds if a register is
4058 * found.
4059 * @param enmGstReg The guest register that will is to be updated.
4060 * @note In strict builds, this may throw instruction buffer growth failures.
4061 * Non-strict builds will not throw anything.
4062 * @sa iemNativeRegAllocTmpForGuestReg
4063 */
4064DECL_HIDDEN_THROW(uint8_t)
4065iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4066{
4067#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4068 AssertMsg( pReNative->idxCurCall == 0
4069 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4070 || enmGstReg == kIemNativeGstReg_Pc
4071 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4072#endif
4073 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
4074}
4075
4076
4077#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
4078/**
4079 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
4080 * EFLAGS.
4081 *
4082 * This takes additional arguments for covering liveness assertions in strict
4083 * builds, it's otherwise the same as
4084 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
4085 * kIemNativeGstReg_EFlags as argument.
4086 *
4087 * @note The @a fWrite parameter is necessary to complete the liveness picture,
4088 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
4089 * commit. It the operation clobbers all the flags, @a fRead will be
4090 * zero, so better verify the whole picture while we're here.
4091 */
4092DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4093 uint64_t fRead, uint64_t fWrite /*=0*/)
4094{
4095 if (pReNative->idxCurCall != 0)
4096 {
4097 Assert(fRead | fWrite);
4098 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4099 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4100 uint64_t const fAll = fRead | fWrite;
4101 uint32_t fState;
4102# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
4103 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
4104 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
4105 || ( fRead & RT_BIT_64(a_enmGstEfl) \
4106 ? fWrite & RT_BIT_64(a_enmGstEfl) \
4107 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
4108 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
4109 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
4110 ) \
4111 , ("%s - %u\n", #a_enmGstEfl, fState))
4112 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
4113 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
4114 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
4115 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
4116 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
4117 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
4118 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
4119# undef MY_ASSERT_ONE_EFL
4120 }
4121 RT_NOREF(fRead);
4122 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
4123}
4124#endif
4125
4126
4127/**
4128 * Allocates argument registers for a function call.
4129 *
4130 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4131 * need to check the return value.
4132 * @param pReNative The native recompile state.
4133 * @param off The current code buffer offset.
4134 * @param cArgs The number of arguments the function call takes.
4135 */
4136DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4137{
4138 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4139 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4140 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4141 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4142
4143 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4144 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4145 else if (cArgs == 0)
4146 return true;
4147
4148 /*
4149 * Do we get luck and all register are free and not shadowing anything?
4150 */
4151 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4152 for (uint32_t i = 0; i < cArgs; i++)
4153 {
4154 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4155 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4156 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4157 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4158 }
4159 /*
4160 * Okay, not lucky so we have to free up the registers.
4161 */
4162 else
4163 for (uint32_t i = 0; i < cArgs; i++)
4164 {
4165 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4166 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4167 {
4168 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4169 {
4170 case kIemNativeWhat_Var:
4171 {
4172 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4173 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4174 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4175 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4176 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4177 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4178
4179 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4180 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4181 else
4182 {
4183 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4184 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4185 }
4186 break;
4187 }
4188
4189 case kIemNativeWhat_Tmp:
4190 case kIemNativeWhat_Arg:
4191 case kIemNativeWhat_rc:
4192 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4193 default:
4194 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4195 }
4196
4197 }
4198 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4199 {
4200 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4201 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4202 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4203#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4204 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4205#endif
4206 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4207 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4208 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4209 }
4210 else
4211 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4212 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4213 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4214 }
4215 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4216 return true;
4217}
4218
4219
4220DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4221
4222
4223#if 0
4224/**
4225 * Frees a register assignment of any type.
4226 *
4227 * @param pReNative The native recompile state.
4228 * @param idxHstReg The register to free.
4229 *
4230 * @note Does not update variables.
4231 */
4232DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4233{
4234 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4235 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4236 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4237 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4238 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4239 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4240 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4241 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4242 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4243 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4244 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4245 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4246 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4247 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4248
4249 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4250 /* no flushing, right:
4251 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4252 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4253 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4254 */
4255}
4256#endif
4257
4258
4259/**
4260 * Frees a temporary register.
4261 *
4262 * Any shadow copies of guest registers assigned to the host register will not
4263 * be flushed by this operation.
4264 */
4265DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4266{
4267 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4268 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4269 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4270 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4271 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4272}
4273
4274
4275/**
4276 * Frees a temporary immediate register.
4277 *
4278 * It is assumed that the call has not modified the register, so it still hold
4279 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4280 */
4281DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4282{
4283 iemNativeRegFreeTmp(pReNative, idxHstReg);
4284}
4285
4286
4287/**
4288 * Frees a register assigned to a variable.
4289 *
4290 * The register will be disassociated from the variable.
4291 */
4292DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4293{
4294 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4295 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4296 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4297 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4298 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4299 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4300
4301 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4302 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4303 if (!fFlushShadows)
4304 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4305 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4306 else
4307 {
4308 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4309 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4310#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4311 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4312#endif
4313 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4314 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4315 uint64_t fGstRegShadows = fGstRegShadowsOld;
4316 while (fGstRegShadows)
4317 {
4318 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4319 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4320
4321 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4322 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4323 }
4324 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4325 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4326 }
4327}
4328
4329
4330#if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4331/** Host CPU SIMD register names. */
4332DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4333{
4334# ifdef RT_ARCH_AMD64
4335 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4336# elif RT_ARCH_ARM64
4337 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4338 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4339# else
4340# error "port me"
4341# endif
4342};
4343#endif
4344
4345
4346/**
4347 * Frees a SIMD register assigned to a variable.
4348 *
4349 * The register will be disassociated from the variable.
4350 */
4351DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4352{
4353 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4354 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4355 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4356 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4357 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4358 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4359
4360 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4361 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4362 if (!fFlushShadows)
4363 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4364 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4365 else
4366 {
4367 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4368 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4369 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4370 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4371 uint64_t fGstRegShadows = fGstRegShadowsOld;
4372 while (fGstRegShadows)
4373 {
4374 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4375 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4376
4377 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4378 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4379 }
4380 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4381 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4382 }
4383}
4384
4385
4386/**
4387 * Reassigns a variable to a different SIMD register specified by the caller.
4388 *
4389 * @returns The new code buffer position.
4390 * @param pReNative The native recompile state.
4391 * @param off The current code buffer position.
4392 * @param idxVar The variable index.
4393 * @param idxRegOld The old host register number.
4394 * @param idxRegNew The new host register number.
4395 * @param pszCaller The caller for logging.
4396 */
4397static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4398 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4399{
4400 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4401 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4402 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4403 RT_NOREF(pszCaller);
4404
4405 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4406 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4407 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4408
4409 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4410 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4411 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4412
4413 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4414 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4415 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4416
4417 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4418 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4419 else
4420 {
4421 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4422 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4423 }
4424
4425 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4426 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4427 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4428 if (fGstRegShadows)
4429 {
4430 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4431 | RT_BIT_32(idxRegNew);
4432 while (fGstRegShadows)
4433 {
4434 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4435 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4436
4437 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4438 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4439 }
4440 }
4441
4442 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4443 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4444 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4445 return off;
4446}
4447
4448
4449/**
4450 * Moves a variable to a different register or spills it onto the stack.
4451 *
4452 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4453 * kinds can easily be recreated if needed later.
4454 *
4455 * @returns The new code buffer position.
4456 * @param pReNative The native recompile state.
4457 * @param off The current code buffer position.
4458 * @param idxVar The variable index.
4459 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4460 * call-volatile registers.
4461 */
4462DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4463 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4464{
4465 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4466 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4467 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4468 Assert(!pVar->fRegAcquired);
4469 Assert(!pVar->fSimdReg);
4470
4471 uint8_t const idxRegOld = pVar->idxReg;
4472 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4473 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4474 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4475 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4476 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4477 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4478 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4479 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4480 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4481 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4482
4483 /** @todo Add statistics on this.*/
4484 /** @todo Implement basic variable liveness analysis (python) so variables
4485 * can be freed immediately once no longer used. This has the potential to
4486 * be trashing registers and stack for dead variables.
4487 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4488
4489 /*
4490 * First try move it to a different register, as that's cheaper.
4491 */
4492 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4493 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4494 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4495 if (fRegs)
4496 {
4497 /* Avoid using shadow registers, if possible. */
4498 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4499 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4500 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4501 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4502 }
4503
4504 /*
4505 * Otherwise we must spill the register onto the stack.
4506 */
4507 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4508 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4509 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4510
4511 if (pVar->cbVar == sizeof(RTUINT128U))
4512 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4513 else
4514 {
4515 Assert(pVar->cbVar == sizeof(RTUINT256U));
4516 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4517 }
4518
4519 pVar->idxReg = UINT8_MAX;
4520 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4521 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4522 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4523 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4524 return off;
4525}
4526
4527
4528/**
4529 * Called right before emitting a call instruction to move anything important
4530 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4531 * optionally freeing argument variables.
4532 *
4533 * @returns New code buffer offset, UINT32_MAX on failure.
4534 * @param pReNative The native recompile state.
4535 * @param off The code buffer offset.
4536 * @param cArgs The number of arguments the function call takes.
4537 * It is presumed that the host register part of these have
4538 * been allocated as such already and won't need moving,
4539 * just freeing.
4540 * @param fKeepVars Mask of variables that should keep their register
4541 * assignments. Caller must take care to handle these.
4542 */
4543DECL_HIDDEN_THROW(uint32_t)
4544iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4545{
4546 Assert(!cArgs); RT_NOREF(cArgs);
4547
4548 /* fKeepVars will reduce this mask. */
4549 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4550
4551 /*
4552 * Move anything important out of volatile registers.
4553 */
4554 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4555#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4556 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4557#endif
4558 ;
4559
4560 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4561 if (!fSimdRegsToMove)
4562 { /* likely */ }
4563 else
4564 {
4565 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4566 while (fSimdRegsToMove != 0)
4567 {
4568 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4569 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4570
4571 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4572 {
4573 case kIemNativeWhat_Var:
4574 {
4575 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4576 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4577 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4578 Assert(pVar->idxReg == idxSimdReg);
4579 Assert(pVar->fSimdReg);
4580 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4581 {
4582 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4583 idxVar, pVar->enmKind, pVar->idxReg));
4584 if (pVar->enmKind != kIemNativeVarKind_Stack)
4585 pVar->idxReg = UINT8_MAX;
4586 else
4587 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4588 }
4589 else
4590 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4591 continue;
4592 }
4593
4594 case kIemNativeWhat_Arg:
4595 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4596 continue;
4597
4598 case kIemNativeWhat_rc:
4599 case kIemNativeWhat_Tmp:
4600 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4601 continue;
4602
4603 case kIemNativeWhat_FixedReserved:
4604#ifdef RT_ARCH_ARM64
4605 continue; /* On ARM the upper half of the virtual 256-bit register. */
4606#endif
4607
4608 case kIemNativeWhat_FixedTmp:
4609 case kIemNativeWhat_pVCpuFixed:
4610 case kIemNativeWhat_pCtxFixed:
4611 case kIemNativeWhat_PcShadow:
4612 case kIemNativeWhat_Invalid:
4613 case kIemNativeWhat_End:
4614 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4615 }
4616 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4617 }
4618 }
4619
4620 /*
4621 * Do the actual freeing.
4622 */
4623 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4624 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4625 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4626 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4627
4628 /* If there are guest register shadows in any call-volatile register, we
4629 have to clear the corrsponding guest register masks for each register. */
4630 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4631 if (fHstSimdRegsWithGstShadow)
4632 {
4633 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4634 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4635 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4636 do
4637 {
4638 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4639 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4640
4641 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4642
4643#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4644 /*
4645 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4646 * to call volatile registers).
4647 */
4648 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4649 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4650 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4651#endif
4652 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4653 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4654
4655 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4656 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4657 } while (fHstSimdRegsWithGstShadow != 0);
4658 }
4659
4660 return off;
4661}
4662
4663
4664/**
4665 * Called right before emitting a call instruction to move anything important
4666 * out of call-volatile registers, free and flush the call-volatile registers,
4667 * optionally freeing argument variables.
4668 *
4669 * @returns New code buffer offset, UINT32_MAX on failure.
4670 * @param pReNative The native recompile state.
4671 * @param off The code buffer offset.
4672 * @param cArgs The number of arguments the function call takes.
4673 * It is presumed that the host register part of these have
4674 * been allocated as such already and won't need moving,
4675 * just freeing.
4676 * @param fKeepVars Mask of variables that should keep their register
4677 * assignments. Caller must take care to handle these.
4678 */
4679DECL_HIDDEN_THROW(uint32_t)
4680iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4681{
4682 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4683
4684 /* fKeepVars will reduce this mask. */
4685 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4686
4687#ifdef RT_ARCH_ARM64
4688AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4689#endif
4690
4691 /*
4692 * Move anything important out of volatile registers.
4693 */
4694 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4695 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4696 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4697#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4698 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4699#endif
4700 & ~g_afIemNativeCallRegs[cArgs];
4701
4702 fRegsToMove &= pReNative->Core.bmHstRegs;
4703 if (!fRegsToMove)
4704 { /* likely */ }
4705 else
4706 {
4707 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4708 while (fRegsToMove != 0)
4709 {
4710 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4711 fRegsToMove &= ~RT_BIT_32(idxReg);
4712
4713 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4714 {
4715 case kIemNativeWhat_Var:
4716 {
4717 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4718 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4719 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4720 Assert(pVar->idxReg == idxReg);
4721 Assert(!pVar->fSimdReg);
4722 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4723 {
4724 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4725 idxVar, pVar->enmKind, pVar->idxReg));
4726 if (pVar->enmKind != kIemNativeVarKind_Stack)
4727 pVar->idxReg = UINT8_MAX;
4728 else
4729 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4730 }
4731 else
4732 fRegsToFree &= ~RT_BIT_32(idxReg);
4733 continue;
4734 }
4735
4736 case kIemNativeWhat_Arg:
4737 AssertMsgFailed(("What?!?: %u\n", idxReg));
4738 continue;
4739
4740 case kIemNativeWhat_rc:
4741 case kIemNativeWhat_Tmp:
4742 AssertMsgFailed(("Missing free: %u\n", idxReg));
4743 continue;
4744
4745 case kIemNativeWhat_FixedTmp:
4746 case kIemNativeWhat_pVCpuFixed:
4747 case kIemNativeWhat_pCtxFixed:
4748 case kIemNativeWhat_PcShadow:
4749 case kIemNativeWhat_FixedReserved:
4750 case kIemNativeWhat_Invalid:
4751 case kIemNativeWhat_End:
4752 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4753 }
4754 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4755 }
4756 }
4757
4758 /*
4759 * Do the actual freeing.
4760 */
4761 if (pReNative->Core.bmHstRegs & fRegsToFree)
4762 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4763 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4764 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4765
4766 /* If there are guest register shadows in any call-volatile register, we
4767 have to clear the corrsponding guest register masks for each register. */
4768 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4769 if (fHstRegsWithGstShadow)
4770 {
4771 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4772 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4773 fHstRegsWithGstShadow));
4774 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4775 do
4776 {
4777 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4778 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4779
4780 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4781
4782#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4783 /*
4784 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4785 * to call volatile registers).
4786 */
4787 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4788 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4789 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4790#endif
4791
4792 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4793 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4794 } while (fHstRegsWithGstShadow != 0);
4795 }
4796
4797 /*
4798 * Now for the SIMD registers, no argument support for now.
4799 */
4800 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4801
4802 return off;
4803}
4804
4805
4806/**
4807 * Flushes a set of guest register shadow copies.
4808 *
4809 * This is usually done after calling a threaded function or a C-implementation
4810 * of an instruction.
4811 *
4812 * @param pReNative The native recompile state.
4813 * @param fGstRegs Set of guest registers to flush.
4814 */
4815DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4816{
4817 /*
4818 * Reduce the mask by what's currently shadowed
4819 */
4820 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4821 fGstRegs &= bmGstRegShadowsOld;
4822 if (fGstRegs)
4823 {
4824 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4825 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4826 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4827 if (bmGstRegShadowsNew)
4828 {
4829 /*
4830 * Partial.
4831 */
4832 do
4833 {
4834 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4835 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4836 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4837 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4838 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4839#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4840 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4841#endif
4842
4843 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4844 fGstRegs &= ~fInThisHstReg;
4845 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4846 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4847 if (!fGstRegShadowsNew)
4848 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4849 } while (fGstRegs != 0);
4850 }
4851 else
4852 {
4853 /*
4854 * Clear all.
4855 */
4856 do
4857 {
4858 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4859 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4860 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4861 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4862 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4863#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4864 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4865#endif
4866
4867 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4868 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4869 } while (fGstRegs != 0);
4870 pReNative->Core.bmHstRegsWithGstShadow = 0;
4871 }
4872 }
4873}
4874
4875
4876/**
4877 * Flushes guest register shadow copies held by a set of host registers.
4878 *
4879 * This is used with the TLB lookup code for ensuring that we don't carry on
4880 * with any guest shadows in volatile registers, as these will get corrupted by
4881 * a TLB miss.
4882 *
4883 * @param pReNative The native recompile state.
4884 * @param fHstRegs Set of host registers to flush guest shadows for.
4885 */
4886DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4887{
4888 /*
4889 * Reduce the mask by what's currently shadowed.
4890 */
4891 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4892 fHstRegs &= bmHstRegsWithGstShadowOld;
4893 if (fHstRegs)
4894 {
4895 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4896 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4897 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4898 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4899 if (bmHstRegsWithGstShadowNew)
4900 {
4901 /*
4902 * Partial (likely).
4903 */
4904 uint64_t fGstShadows = 0;
4905 do
4906 {
4907 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4908 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4909 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4910 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4911#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4912 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4913#endif
4914
4915 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4916 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4917 fHstRegs &= ~RT_BIT_32(idxHstReg);
4918 } while (fHstRegs != 0);
4919 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4920 }
4921 else
4922 {
4923 /*
4924 * Clear all.
4925 */
4926 do
4927 {
4928 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4929 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4930 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4931 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4932#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4933 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4934#endif
4935
4936 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4937 fHstRegs &= ~RT_BIT_32(idxHstReg);
4938 } while (fHstRegs != 0);
4939 pReNative->Core.bmGstRegShadows = 0;
4940 }
4941 }
4942}
4943
4944
4945/**
4946 * Restores guest shadow copies in volatile registers.
4947 *
4948 * This is used after calling a helper function (think TLB miss) to restore the
4949 * register state of volatile registers.
4950 *
4951 * @param pReNative The native recompile state.
4952 * @param off The code buffer offset.
4953 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4954 * be active (allocated) w/o asserting. Hack.
4955 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4956 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4957 */
4958DECL_HIDDEN_THROW(uint32_t)
4959iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4960{
4961 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4962 if (fHstRegs)
4963 {
4964 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4965 do
4966 {
4967 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4968
4969 /* It's not fatal if a register is active holding a variable that
4970 shadowing a guest register, ASSUMING all pending guest register
4971 writes were flushed prior to the helper call. However, we'll be
4972 emitting duplicate restores, so it wasts code space. */
4973 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4974 RT_NOREF(fHstRegsActiveShadows);
4975
4976 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4977#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4978 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4979#endif
4980 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4981 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4982 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4983
4984 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4985 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4986
4987 fHstRegs &= ~RT_BIT_32(idxHstReg);
4988 } while (fHstRegs != 0);
4989 }
4990 return off;
4991}
4992
4993
4994
4995
4996/*********************************************************************************************************************************
4997* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4998*********************************************************************************************************************************/
4999
5000/**
5001 * Info about shadowed guest SIMD register values.
5002 * @see IEMNATIVEGSTSIMDREG
5003 */
5004static struct
5005{
5006 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5007 uint32_t offXmm;
5008 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5009 uint32_t offYmm;
5010 /** Name (for logging). */
5011 const char *pszName;
5012} const g_aGstSimdShadowInfo[] =
5013{
5014#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5015 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5016 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5017 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5018 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5019 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5020 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5021 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5022 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5023 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5024 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5025 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5026 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5027 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5028 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5029 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5030 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5031 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5032#undef CPUMCTX_OFF_AND_SIZE
5033};
5034AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5035
5036
5037/**
5038 * Frees a temporary SIMD register.
5039 *
5040 * Any shadow copies of guest registers assigned to the host register will not
5041 * be flushed by this operation.
5042 */
5043DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5044{
5045 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5046 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5047 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5048 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5049 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5050}
5051
5052
5053/**
5054 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5055 *
5056 * @returns New code bufferoffset.
5057 * @param pReNative The native recompile state.
5058 * @param off Current code buffer position.
5059 * @param enmGstSimdReg The guest SIMD register to flush.
5060 */
5061DECL_HIDDEN_THROW(uint32_t)
5062iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5063{
5064 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5065
5066 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5067 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5068 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5069 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5070
5071 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5072 {
5073 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5074 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5075 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5076 }
5077
5078 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5079 {
5080 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5081 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5082 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5083 }
5084
5085 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5086 return off;
5087}
5088
5089
5090/**
5091 * Flush the given set of guest SIMD registers if marked as dirty.
5092 *
5093 * @returns New code buffer offset.
5094 * @param pReNative The native recompile state.
5095 * @param off Current code buffer position.
5096 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5097 */
5098DECL_HIDDEN_THROW(uint32_t)
5099iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5100{
5101 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5102 & fFlushGstSimdReg;
5103 if (bmGstSimdRegShadowDirty)
5104 {
5105# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5106 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5107 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5108# endif
5109
5110 do
5111 {
5112 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5113 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5114 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5115 } while (bmGstSimdRegShadowDirty);
5116 }
5117
5118 return off;
5119}
5120
5121
5122#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5123/**
5124 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5125 *
5126 * @returns New code buffer offset.
5127 * @param pReNative The native recompile state.
5128 * @param off Current code buffer position.
5129 * @param idxHstSimdReg The host SIMD register.
5130 *
5131 * @note This doesn't do any unshadowing of guest registers from the host register.
5132 */
5133DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5134{
5135 /* We need to flush any pending guest register writes this host register shadows. */
5136 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5137 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5138 if (bmGstSimdRegShadowDirty)
5139 {
5140# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5141 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5142 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5143# endif
5144
5145 do
5146 {
5147 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5148 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5149 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5150 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5151 } while (bmGstSimdRegShadowDirty);
5152 }
5153
5154 return off;
5155}
5156#endif
5157
5158
5159/**
5160 * Locate a register, possibly freeing one up.
5161 *
5162 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5163 * failed.
5164 *
5165 * @returns Host register number on success. Returns UINT8_MAX if no registers
5166 * found, the caller is supposed to deal with this and raise a
5167 * allocation type specific status code (if desired).
5168 *
5169 * @throws VBox status code if we're run into trouble spilling a variable of
5170 * recording debug info. Does NOT throw anything if we're out of
5171 * registers, though.
5172 */
5173static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5174 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5175{
5176 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5177 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5178 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5179
5180 /*
5181 * Try a freed register that's shadowing a guest register.
5182 */
5183 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5184 if (fRegs)
5185 {
5186 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5187
5188#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5189 /*
5190 * When we have livness information, we use it to kick out all shadowed
5191 * guest register that will not be needed any more in this TB. If we're
5192 * lucky, this may prevent us from ending up here again.
5193 *
5194 * Note! We must consider the previous entry here so we don't free
5195 * anything that the current threaded function requires (current
5196 * entry is produced by the next threaded function).
5197 */
5198 uint32_t const idxCurCall = pReNative->idxCurCall;
5199 if (idxCurCall > 0)
5200 {
5201 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5202 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5203
5204 /* If it matches any shadowed registers. */
5205 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5206 {
5207 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5208 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5209 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5210
5211 /* See if we've got any unshadowed registers we can return now. */
5212 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5213 if (fUnshadowedRegs)
5214 {
5215 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5216 return (fPreferVolatile
5217 ? ASMBitFirstSetU32(fUnshadowedRegs)
5218 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5219 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5220 - 1;
5221 }
5222 }
5223 }
5224#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5225
5226 unsigned const idxReg = (fPreferVolatile
5227 ? ASMBitFirstSetU32(fRegs)
5228 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5229 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5230 - 1;
5231
5232 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5233 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5234 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5235 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5236
5237 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5238 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5239
5240 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5241 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5242 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5243 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5244 return idxReg;
5245 }
5246
5247 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5248
5249 /*
5250 * Try free up a variable that's in a register.
5251 *
5252 * We do two rounds here, first evacuating variables we don't need to be
5253 * saved on the stack, then in the second round move things to the stack.
5254 */
5255 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5256 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5257 {
5258 uint32_t fVars = pReNative->Core.bmVars;
5259 while (fVars)
5260 {
5261 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5262 if (pReNative->Core.aVars[idxVar].fSimdReg) /* (this is the SIMD allocator) */
5263 {
5264 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5265 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5266 && (RT_BIT_32(idxReg) & fRegMask)
5267 && ( iLoop == 0
5268 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5269 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5270 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5271 {
5272 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5273 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5274 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5275 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5276 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5277 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5278
5279 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5280 {
5281 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5282 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5283 }
5284
5285 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5286 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5287
5288 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5289 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5290 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5291 return idxReg;
5292 }
5293 }
5294 fVars &= ~RT_BIT_32(idxVar);
5295 }
5296 }
5297
5298 AssertFailed();
5299 return UINT8_MAX;
5300}
5301
5302
5303/**
5304 * Flushes a set of guest register shadow copies.
5305 *
5306 * This is usually done after calling a threaded function or a C-implementation
5307 * of an instruction.
5308 *
5309 * @param pReNative The native recompile state.
5310 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5311 */
5312DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5313{
5314 /*
5315 * Reduce the mask by what's currently shadowed
5316 */
5317 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5318 fGstSimdRegs &= bmGstSimdRegShadows;
5319 if (fGstSimdRegs)
5320 {
5321 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5322 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5323 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5324 if (bmGstSimdRegShadowsNew)
5325 {
5326 /*
5327 * Partial.
5328 */
5329 do
5330 {
5331 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5332 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5333 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5334 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5335 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5336 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5337
5338 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5339 fGstSimdRegs &= ~fInThisHstReg;
5340 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5341 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5342 if (!fGstRegShadowsNew)
5343 {
5344 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5345 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5346 }
5347 } while (fGstSimdRegs != 0);
5348 }
5349 else
5350 {
5351 /*
5352 * Clear all.
5353 */
5354 do
5355 {
5356 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5357 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5358 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5359 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5360 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5361 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5362
5363 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5364 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5365 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5366 } while (fGstSimdRegs != 0);
5367 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5368 }
5369 }
5370}
5371
5372
5373/**
5374 * Allocates a temporary host SIMD register.
5375 *
5376 * This may emit code to save register content onto the stack in order to free
5377 * up a register.
5378 *
5379 * @returns The host register number; throws VBox status code on failure,
5380 * so no need to check the return value.
5381 * @param pReNative The native recompile state.
5382 * @param poff Pointer to the variable with the code buffer position.
5383 * This will be update if we need to move a variable from
5384 * register to stack in order to satisfy the request.
5385 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5386 * registers (@c true, default) or the other way around
5387 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5388 */
5389DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5390{
5391 /*
5392 * Try find a completely unused register, preferably a call-volatile one.
5393 */
5394 uint8_t idxSimdReg;
5395 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5396 & ~pReNative->Core.bmHstRegsWithGstShadow
5397 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5398 if (fRegs)
5399 {
5400 if (fPreferVolatile)
5401 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5402 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5403 else
5404 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5405 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5406 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5407 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5408
5409 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5410 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5411 }
5412 else
5413 {
5414 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5415 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5416 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5417 }
5418
5419 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5420 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5421}
5422
5423
5424/**
5425 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5426 * registers.
5427 *
5428 * @returns The host register number; throws VBox status code on failure,
5429 * so no need to check the return value.
5430 * @param pReNative The native recompile state.
5431 * @param poff Pointer to the variable with the code buffer position.
5432 * This will be update if we need to move a variable from
5433 * register to stack in order to satisfy the request.
5434 * @param fRegMask Mask of acceptable registers.
5435 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5436 * registers (@c true, default) or the other way around
5437 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5438 */
5439DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5440 bool fPreferVolatile /*= true*/)
5441{
5442 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5443 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5444
5445 /*
5446 * Try find a completely unused register, preferably a call-volatile one.
5447 */
5448 uint8_t idxSimdReg;
5449 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5450 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5451 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5452 & fRegMask;
5453 if (fRegs)
5454 {
5455 if (fPreferVolatile)
5456 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5457 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5458 else
5459 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5460 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5461 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5462 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5463
5464 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5465 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5466 }
5467 else
5468 {
5469 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5470 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5471 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5472 }
5473
5474 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5475 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5476}
5477
5478
5479/**
5480 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5481 *
5482 * @param pReNative The native recompile state.
5483 * @param idxHstSimdReg The host SIMD register to update the state for.
5484 * @param enmLoadSz The load size to set.
5485 */
5486DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5487 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5488{
5489 /* Everything valid already? -> nothing to do. */
5490 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5491 return;
5492
5493 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5494 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5495 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5496 {
5497 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5498 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5499 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5500 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5501 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5502 }
5503}
5504
5505
5506static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5507 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5508{
5509 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5510 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5511 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5512 {
5513#ifdef RT_ARCH_ARM64
5514 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5515 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5516#endif
5517
5518 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5519 {
5520 switch (enmLoadSzDst)
5521 {
5522 case kIemNativeGstSimdRegLdStSz_256:
5523 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5524 break;
5525 case kIemNativeGstSimdRegLdStSz_Low128:
5526 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5527 break;
5528 case kIemNativeGstSimdRegLdStSz_High128:
5529 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5530 break;
5531 default:
5532 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5533 }
5534
5535 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5536 }
5537 }
5538 else
5539 {
5540 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5541 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5542 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5543 }
5544
5545 return off;
5546}
5547
5548
5549/**
5550 * Allocates a temporary host SIMD register for keeping a guest
5551 * SIMD register value.
5552 *
5553 * Since we may already have a register holding the guest register value,
5554 * code will be emitted to do the loading if that's not the case. Code may also
5555 * be emitted if we have to free up a register to satify the request.
5556 *
5557 * @returns The host register number; throws VBox status code on failure, so no
5558 * need to check the return value.
5559 * @param pReNative The native recompile state.
5560 * @param poff Pointer to the variable with the code buffer
5561 * position. This will be update if we need to move a
5562 * variable from register to stack in order to satisfy
5563 * the request.
5564 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5565 * @param enmLoadSz Load/store size.
5566 * @param enmIntendedUse How the caller will be using the host register.
5567 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5568 * register is okay (default). The ASSUMPTION here is
5569 * that the caller has already flushed all volatile
5570 * registers, so this is only applied if we allocate a
5571 * new register.
5572 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5573 */
5574DECL_HIDDEN_THROW(uint8_t)
5575iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5576 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz,
5577 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5578 bool fNoVolatileRegs /*= false*/)
5579{
5580 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5581#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5582 AssertMsg( pReNative->idxCurCall == 0
5583 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5584 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5585 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5586 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5587 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5588 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5589#endif
5590#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5591 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5592#endif
5593 uint32_t const fRegMask = !fNoVolatileRegs
5594 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5595 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5596
5597 /*
5598 * First check if the guest register value is already in a host register.
5599 */
5600 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5601 {
5602 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5603 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5604 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5605 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5606
5607 /* It's not supposed to be allocated... */
5608 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5609 {
5610 /*
5611 * If the register will trash the guest shadow copy, try find a
5612 * completely unused register we can use instead. If that fails,
5613 * we need to disassociate the host reg from the guest reg.
5614 */
5615 /** @todo would be nice to know if preserving the register is in any way helpful. */
5616 /* If the purpose is calculations, try duplicate the register value as
5617 we'll be clobbering the shadow. */
5618 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5619 && ( ~pReNative->Core.bmHstSimdRegs
5620 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5621 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5622 {
5623 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5624
5625 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5626
5627 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5628 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5629 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5630 idxSimdReg = idxRegNew;
5631 }
5632 /* If the current register matches the restrictions, go ahead and allocate
5633 it for the caller. */
5634 else if (fRegMask & RT_BIT_32(idxSimdReg))
5635 {
5636 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5637 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5638 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5639 {
5640 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5641 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5642 else
5643 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5644 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5645 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5646 }
5647 else
5648 {
5649 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5650 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5651 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5652 }
5653 }
5654 /* Otherwise, allocate a register that satisfies the caller and transfer
5655 the shadowing if compatible with the intended use. (This basically
5656 means the call wants a non-volatile register (RSP push/pop scenario).) */
5657 else
5658 {
5659 Assert(fNoVolatileRegs);
5660 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5661 !fNoVolatileRegs
5662 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5663 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5664 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5665 {
5666 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5667 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5668 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5669 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5670 }
5671 else
5672 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5673 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5674 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5675 idxSimdReg = idxRegNew;
5676 }
5677 }
5678 else
5679 {
5680 /*
5681 * Oops. Shadowed guest register already allocated!
5682 *
5683 * Allocate a new register, copy the value and, if updating, the
5684 * guest shadow copy assignment to the new register.
5685 */
5686 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5687 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5688 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5689 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5690
5691 /** @todo share register for readonly access. */
5692 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5693 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5694
5695 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5696 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5697 else
5698 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5699
5700 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5701 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5702 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5703 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5704 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5705 else
5706 {
5707 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5708 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5709 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5710 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5711 }
5712 idxSimdReg = idxRegNew;
5713 }
5714 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5715
5716#ifdef VBOX_STRICT
5717 /* Strict builds: Check that the value is correct. */
5718 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5719 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5720#endif
5721
5722 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5723 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5724 {
5725#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5726 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5727 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5728#endif
5729
5730 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5731 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5732 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5733 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5734 else
5735 {
5736 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5737 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5738 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5739 }
5740 }
5741
5742 return idxSimdReg;
5743 }
5744
5745 /*
5746 * Allocate a new register, load it with the guest value and designate it as a copy of the
5747 */
5748 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5749
5750 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5751 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5752 else
5753 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5754
5755 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5756 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5757
5758 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5759 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5760 {
5761#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5762 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5763 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5764#endif
5765
5766 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5767 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5768 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5769 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5770 else
5771 {
5772 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5773 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5774 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5775 }
5776 }
5777
5778 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5779 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5780
5781 return idxRegNew;
5782}
5783
5784
5785/**
5786 * Flushes guest SIMD register shadow copies held by a set of host registers.
5787 *
5788 * This is used whenever calling an external helper for ensuring that we don't carry on
5789 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5790 *
5791 * @param pReNative The native recompile state.
5792 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5793 */
5794DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5795{
5796 /*
5797 * Reduce the mask by what's currently shadowed.
5798 */
5799 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5800 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5801 if (fHstSimdRegs)
5802 {
5803 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5804 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5805 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5806 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5807 if (bmHstSimdRegsWithGstShadowNew)
5808 {
5809 /*
5810 * Partial (likely).
5811 */
5812 uint64_t fGstShadows = 0;
5813 do
5814 {
5815 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5816 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5817 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5818 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5819 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5820 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5821
5822 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5823 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5824 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5825 } while (fHstSimdRegs != 0);
5826 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5827 }
5828 else
5829 {
5830 /*
5831 * Clear all.
5832 */
5833 do
5834 {
5835 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5836 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5837 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5838 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5839 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5840 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5841
5842 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5843 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5844 } while (fHstSimdRegs != 0);
5845 pReNative->Core.bmGstSimdRegShadows = 0;
5846 }
5847 }
5848}
5849
5850
5851
5852/*********************************************************************************************************************************
5853* Code emitters for flushing pending guest register writes and sanity checks *
5854*********************************************************************************************************************************/
5855
5856#ifdef VBOX_STRICT
5857/**
5858 * Does internal register allocator sanity checks.
5859 */
5860DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5861{
5862 /*
5863 * Iterate host registers building a guest shadowing set.
5864 */
5865 uint64_t bmGstRegShadows = 0;
5866 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5867 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5868 while (bmHstRegsWithGstShadow)
5869 {
5870 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5871 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5872 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5873
5874 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5875 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5876 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5877 bmGstRegShadows |= fThisGstRegShadows;
5878 while (fThisGstRegShadows)
5879 {
5880 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5881 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5882 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5883 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5884 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5885 }
5886 }
5887 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5888 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5889 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5890
5891 /*
5892 * Now the other way around, checking the guest to host index array.
5893 */
5894 bmHstRegsWithGstShadow = 0;
5895 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5896 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5897 while (bmGstRegShadows)
5898 {
5899 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5900 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5901 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5902
5903 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5904 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5905 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5906 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5907 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5908 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5909 }
5910 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5911 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5912 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5913}
5914#endif /* VBOX_STRICT */
5915
5916
5917/**
5918 * Flushes any delayed guest register writes.
5919 *
5920 * This must be called prior to calling CImpl functions and any helpers that use
5921 * the guest state (like raising exceptions) and such.
5922 *
5923 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5924 * the caller if it wishes to do so.
5925 */
5926DECL_HIDDEN_THROW(uint32_t)
5927iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5928{
5929#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5930 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5931 off = iemNativeEmitPcWriteback(pReNative, off);
5932#else
5933 RT_NOREF(pReNative, fGstShwExcept);
5934#endif
5935
5936#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5937 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5938#endif
5939
5940 return iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5941}
5942
5943#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5944
5945# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5946
5947/**
5948 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5949 */
5950DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5951{
5952 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5953 Assert(pReNative->Core.fDebugPcInitialized);
5954
5955 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5956# ifdef RT_ARCH_AMD64
5957 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5958 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5959 pCodeBuf[off++] = 0x3b;
5960 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5961# else
5962 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5963 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5964 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5965# endif
5966
5967 uint32_t offFixup = off;
5968 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5969 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5970 iemNativeFixupFixedJump(pReNative, offFixup, off);
5971
5972 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5973 return off;
5974}
5975
5976
5977/**
5978 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5979 */
5980DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5981{
5982 if (pReNative->Core.fDebugPcInitialized)
5983 {
5984 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5985 if (pReNative->Core.offPc)
5986 {
5987 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5988 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5989 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5990 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5991 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5992 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5993 }
5994 else
5995 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5996 iemNativeRegFreeTmp(pReNative, idxPcReg);
5997 }
5998 return off;
5999}
6000
6001# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
6002
6003/**
6004 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6005 */
6006DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6007{
6008 Assert(pReNative->Core.offPc);
6009# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
6010 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
6011# else
6012 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
6013 uint8_t idxCurCall = pReNative->idxCurCall;
6014 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
6015 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
6016 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
6017 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
6018 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
6019 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
6020 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
6021
6022 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
6023
6024# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6025 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6026 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
6027# endif
6028# endif
6029
6030# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6031 /* Allocate a temporary PC register. */
6032 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6033
6034 /* Perform the addition and store the result. */
6035 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6036 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
6037# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6038 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6039# endif
6040
6041 /* Free but don't flush the PC register. */
6042 iemNativeRegFreeTmp(pReNative, idxPcReg);
6043# else
6044 /* Compare the shadow with the context value, they should match. */
6045 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6046 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6047# endif
6048
6049 pReNative->Core.offPc = 0;
6050
6051 return off;
6052}
6053
6054#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6055
6056
6057/*********************************************************************************************************************************
6058* Code Emitters (larger snippets) *
6059*********************************************************************************************************************************/
6060
6061/**
6062 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6063 * extending to 64-bit width.
6064 *
6065 * @returns New code buffer offset on success, UINT32_MAX on failure.
6066 * @param pReNative .
6067 * @param off The current code buffer position.
6068 * @param idxHstReg The host register to load the guest register value into.
6069 * @param enmGstReg The guest register to load.
6070 *
6071 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6072 * that is something the caller needs to do if applicable.
6073 */
6074DECL_HIDDEN_THROW(uint32_t)
6075iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6076{
6077 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6078 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6079
6080 switch (g_aGstShadowInfo[enmGstReg].cb)
6081 {
6082 case sizeof(uint64_t):
6083 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6084 case sizeof(uint32_t):
6085 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6086 case sizeof(uint16_t):
6087 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6088#if 0 /* not present in the table. */
6089 case sizeof(uint8_t):
6090 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6091#endif
6092 default:
6093 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6094 }
6095}
6096
6097
6098/**
6099 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6100 * extending to 64-bit width, extended version.
6101 *
6102 * @returns New code buffer offset on success, UINT32_MAX on failure.
6103 * @param pCodeBuf The code buffer.
6104 * @param off The current code buffer position.
6105 * @param idxHstReg The host register to load the guest register value into.
6106 * @param enmGstReg The guest register to load.
6107 *
6108 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6109 * that is something the caller needs to do if applicable.
6110 */
6111DECL_HIDDEN_THROW(uint32_t)
6112iemNativeEmitLoadGprWithGstShadowRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6113{
6114 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6115 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6116
6117 switch (g_aGstShadowInfo[enmGstReg].cb)
6118 {
6119 case sizeof(uint64_t):
6120 return iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6121 case sizeof(uint32_t):
6122 return iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6123 case sizeof(uint16_t):
6124 return iemNativeEmitLoadGprFromVCpuU16Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6125#if 0 /* not present in the table. */
6126 case sizeof(uint8_t):
6127 return iemNativeEmitLoadGprFromVCpuU8Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6128#endif
6129 default:
6130#ifdef IEM_WITH_THROW_CATCH
6131 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6132#else
6133 AssertReleaseFailedReturn(off);
6134#endif
6135 }
6136}
6137
6138
6139/**
6140 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6141 *
6142 * @returns New code buffer offset on success, UINT32_MAX on failure.
6143 * @param pReNative The recompiler state.
6144 * @param off The current code buffer position.
6145 * @param idxHstSimdReg The host register to load the guest register value into.
6146 * @param enmGstSimdReg The guest register to load.
6147 * @param enmLoadSz The load size of the register.
6148 *
6149 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6150 * that is something the caller needs to do if applicable.
6151 */
6152DECL_HIDDEN_THROW(uint32_t)
6153iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6154 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6155{
6156 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6157
6158 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6159 switch (enmLoadSz)
6160 {
6161 case kIemNativeGstSimdRegLdStSz_256:
6162 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6163 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6164 case kIemNativeGstSimdRegLdStSz_Low128:
6165 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6166 case kIemNativeGstSimdRegLdStSz_High128:
6167 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6168 default:
6169 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6170 }
6171}
6172
6173#ifdef VBOX_STRICT
6174
6175/**
6176 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6177 *
6178 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6179 * Trashes EFLAGS on AMD64.
6180 */
6181DECL_FORCE_INLINE(uint32_t)
6182iemNativeEmitTop32BitsClearCheckEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxReg)
6183{
6184# ifdef RT_ARCH_AMD64
6185 /* rol reg64, 32 */
6186 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6187 pCodeBuf[off++] = 0xc1;
6188 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6189 pCodeBuf[off++] = 32;
6190
6191 /* test reg32, ffffffffh */
6192 if (idxReg >= 8)
6193 pCodeBuf[off++] = X86_OP_REX_B;
6194 pCodeBuf[off++] = 0xf7;
6195 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6196 pCodeBuf[off++] = 0xff;
6197 pCodeBuf[off++] = 0xff;
6198 pCodeBuf[off++] = 0xff;
6199 pCodeBuf[off++] = 0xff;
6200
6201 /* je/jz +1 */
6202 pCodeBuf[off++] = 0x74;
6203 pCodeBuf[off++] = 0x01;
6204
6205 /* int3 */
6206 pCodeBuf[off++] = 0xcc;
6207
6208 /* rol reg64, 32 */
6209 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6210 pCodeBuf[off++] = 0xc1;
6211 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6212 pCodeBuf[off++] = 32;
6213
6214# elif defined(RT_ARCH_ARM64)
6215 /* lsr tmp0, reg64, #32 */
6216 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6217 /* cbz tmp0, +1 */
6218 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6219 /* brk #0x1100 */
6220 pCodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6221
6222# else
6223# error "Port me!"
6224# endif
6225 return off;
6226}
6227
6228
6229/**
6230 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6231 *
6232 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6233 * Trashes EFLAGS on AMD64.
6234 */
6235DECL_HIDDEN_THROW(uint32_t)
6236iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6237{
6238# ifdef RT_ARCH_AMD64
6239 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6240# elif defined(RT_ARCH_ARM64)
6241 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6242# else
6243# error "Port me!"
6244# endif
6245 off = iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6246 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6247 return off;
6248}
6249
6250
6251/**
6252 * Emitting code that checks that the content of register @a idxReg is the same
6253 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6254 * instruction if that's not the case.
6255 *
6256 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6257 * Trashes EFLAGS on AMD64.
6258 */
6259DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheckEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf,
6260 uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6261{
6262#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6263 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6264 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6265 return off;
6266#endif
6267
6268# ifdef RT_ARCH_AMD64
6269 /* cmp reg, [mem] */
6270 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6271 {
6272 if (idxReg >= 8)
6273 pCodeBuf[off++] = X86_OP_REX_R;
6274 pCodeBuf[off++] = 0x38;
6275 }
6276 else
6277 {
6278 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6279 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6280 else
6281 {
6282 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6283 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6284 else
6285 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6286 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6287 if (idxReg >= 8)
6288 pCodeBuf[off++] = X86_OP_REX_R;
6289 }
6290 pCodeBuf[off++] = 0x39;
6291 }
6292 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6293
6294 /* je/jz +1 */
6295 pCodeBuf[off++] = 0x74;
6296 pCodeBuf[off++] = 0x01;
6297
6298 /* int3 */
6299 pCodeBuf[off++] = 0xcc;
6300
6301 /* For values smaller than the register size, we must check that the rest
6302 of the register is all zeros. */
6303 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6304 {
6305 /* test reg64, imm32 */
6306 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6307 pCodeBuf[off++] = 0xf7;
6308 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6309 pCodeBuf[off++] = 0;
6310 pCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6311 pCodeBuf[off++] = 0xff;
6312 pCodeBuf[off++] = 0xff;
6313
6314 /* je/jz +1 */
6315 pCodeBuf[off++] = 0x74;
6316 pCodeBuf[off++] = 0x01;
6317
6318 /* int3 */
6319 pCodeBuf[off++] = 0xcc;
6320 }
6321 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6322 iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6323 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6324
6325# elif defined(RT_ARCH_ARM64)
6326 /* mov TMP0, [gstreg] */
6327 off = iemNativeEmitLoadGprWithGstShadowRegEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6328
6329 /* sub tmp0, tmp0, idxReg */
6330 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6331 /* cbz tmp0, +2 */
6332 pCodeBuf[off++] = Armv8A64MkInstrCbz(2, IEMNATIVE_REG_FIXED_TMP0);
6333 /* brk #0x1000+enmGstReg */
6334 pCodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6335 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6336
6337# else
6338# error "Port me!"
6339# endif
6340 return off;
6341}
6342
6343
6344/**
6345 * Emitting code that checks that the content of register @a idxReg is the same
6346 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6347 * instruction if that's not the case.
6348 *
6349 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6350 * Trashes EFLAGS on AMD64.
6351 */
6352DECL_HIDDEN_THROW(uint32_t)
6353iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6354{
6355#ifdef RT_ARCH_AMD64
6356 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6357#elif defined(RT_ARCH_ARM64)
6358 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6359# else
6360# error "Port me!"
6361# endif
6362 return iemNativeEmitGuestRegValueCheckEx(pReNative, pCodeBuf, off, idxReg, enmGstReg);
6363}
6364
6365# ifdef RT_ARCH_AMD64
6366/**
6367 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6368 */
6369DECL_FORCE_INLINE_THROW(uint32_t)
6370iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6371{
6372 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6373 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6374 if (idxSimdReg >= 8)
6375 pbCodeBuf[off++] = X86_OP_REX_R;
6376 pbCodeBuf[off++] = 0x0f;
6377 pbCodeBuf[off++] = 0x38;
6378 pbCodeBuf[off++] = 0x29;
6379 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6380
6381 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6382 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6383 pbCodeBuf[off++] = X86_OP_REX_W
6384 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6385 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6386 pbCodeBuf[off++] = 0x0f;
6387 pbCodeBuf[off++] = 0x3a;
6388 pbCodeBuf[off++] = 0x16;
6389 pbCodeBuf[off++] = 0xeb;
6390 pbCodeBuf[off++] = 0x00;
6391
6392 /* cmp tmp0, 0xffffffffffffffff. */
6393 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6394 pbCodeBuf[off++] = 0x83;
6395 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6396 pbCodeBuf[off++] = 0xff;
6397
6398 /* je/jz +1 */
6399 pbCodeBuf[off++] = 0x74;
6400 pbCodeBuf[off++] = 0x01;
6401
6402 /* int3 */
6403 pbCodeBuf[off++] = 0xcc;
6404
6405 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6406 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6407 pbCodeBuf[off++] = X86_OP_REX_W
6408 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6409 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6410 pbCodeBuf[off++] = 0x0f;
6411 pbCodeBuf[off++] = 0x3a;
6412 pbCodeBuf[off++] = 0x16;
6413 pbCodeBuf[off++] = 0xeb;
6414 pbCodeBuf[off++] = 0x01;
6415
6416 /* cmp tmp0, 0xffffffffffffffff. */
6417 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6418 pbCodeBuf[off++] = 0x83;
6419 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6420 pbCodeBuf[off++] = 0xff;
6421
6422 /* je/jz +1 */
6423 pbCodeBuf[off++] = 0x74;
6424 pbCodeBuf[off++] = 0x01;
6425
6426 /* int3 */
6427 pbCodeBuf[off++] = 0xcc;
6428
6429 return off;
6430}
6431# endif /* RT_ARCH_AMD64 */
6432
6433
6434/**
6435 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6436 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6437 * instruction if that's not the case.
6438 *
6439 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6440 * Trashes EFLAGS on AMD64.
6441 */
6442DECL_HIDDEN_THROW(uint32_t)
6443iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6444 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6445{
6446 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6447 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6448 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6449 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6450 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6451 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6452 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6453 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6454 return off;
6455
6456# ifdef RT_ARCH_AMD64
6457 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6458 {
6459 /* movdqa vectmp0, idxSimdReg */
6460 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6461
6462 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6463
6464 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6465 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6466 }
6467
6468 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6469 {
6470 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6471 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6472
6473 /* vextracti128 vectmp0, idxSimdReg, 1 */
6474 pbCodeBuf[off++] = X86_OP_VEX3;
6475 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6476 | X86_OP_VEX3_BYTE1_X
6477 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6478 | 0x03; /* Opcode map */
6479 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6480 pbCodeBuf[off++] = 0x39;
6481 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6482 pbCodeBuf[off++] = 0x01;
6483
6484 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6485 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6486 }
6487
6488# elif defined(RT_ARCH_ARM64)
6489 /* mov vectmp0, [gstreg] */
6490 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6491
6492 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6493 {
6494 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6495 /* eor vectmp0, vectmp0, idxSimdReg */
6496 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6497 /* uaddlv vectmp0, vectmp0.16B */
6498 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6499 /* umov tmp0, vectmp0.H[0] */
6500 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6501 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6502 /* cbz tmp0, +1 */
6503 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6504 /* brk #0x1000+enmGstReg */
6505 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6506 }
6507
6508 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6509 {
6510 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6511 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6512 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6513 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6514 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6515 /* umov tmp0, (vectmp0 + 1).H[0] */
6516 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6517 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6518 /* cbz tmp0, +1 */
6519 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6520 /* brk #0x1000+enmGstReg */
6521 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6522 }
6523
6524# else
6525# error "Port me!"
6526# endif
6527
6528 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6529 return off;
6530}
6531
6532
6533/**
6534 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6535 * important bits.
6536 *
6537 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6538 * Trashes EFLAGS on AMD64.
6539 */
6540DECL_HIDDEN_THROW(uint32_t)
6541iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6542{
6543 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6544 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6545 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6546 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6547
6548# ifdef RT_ARCH_AMD64
6549 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6550
6551 /* je/jz +1 */
6552 pbCodeBuf[off++] = 0x74;
6553 pbCodeBuf[off++] = 0x01;
6554
6555 /* int3 */
6556 pbCodeBuf[off++] = 0xcc;
6557
6558# elif defined(RT_ARCH_ARM64)
6559 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6560
6561 /* b.eq +1 */
6562 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6563 /* brk #0x2000 */
6564 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6565
6566# else
6567# error "Port me!"
6568# endif
6569 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6570
6571 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6572 return off;
6573}
6574
6575#endif /* VBOX_STRICT */
6576
6577
6578#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6579/**
6580 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6581 */
6582DECL_HIDDEN_THROW(uint32_t)
6583iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6584{
6585 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6586
6587 fEflNeeded &= X86_EFL_STATUS_BITS;
6588 if (fEflNeeded)
6589 {
6590# ifdef RT_ARCH_AMD64
6591 /* test dword [pVCpu + offVCpu], imm32 */
6592 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6593 if (fEflNeeded <= 0xff)
6594 {
6595 pCodeBuf[off++] = 0xf6;
6596 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6597 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6598 }
6599 else
6600 {
6601 pCodeBuf[off++] = 0xf7;
6602 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6603 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6604 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6605 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6606 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6607 }
6608
6609 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6610 pCodeBuf[off++] = 0xcc;
6611
6612 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6613
6614# else
6615 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6616 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6617 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6618# ifdef RT_ARCH_ARM64
6619 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6620 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6621# else
6622# error "Port me!"
6623# endif
6624 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6625# endif
6626 }
6627 return off;
6628}
6629#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6630
6631
6632/**
6633 * Emits a code for checking the return code of a call and rcPassUp, returning
6634 * from the code if either are non-zero.
6635 */
6636DECL_HIDDEN_THROW(uint32_t)
6637iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6638{
6639#ifdef RT_ARCH_AMD64
6640 /*
6641 * AMD64: eax = call status code.
6642 */
6643
6644 /* edx = rcPassUp */
6645 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6646# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6647 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6648# endif
6649
6650 /* edx = eax | rcPassUp */
6651 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6652 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6653 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6654 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6655
6656 /* Jump to non-zero status return path. */
6657 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, off);
6658
6659 /* done. */
6660
6661#elif RT_ARCH_ARM64
6662 /*
6663 * ARM64: w0 = call status code.
6664 */
6665 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+3+3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
6666
6667# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6668 AssertCompile(ARMV8_A64_REG_X2 == IEMNATIVE_CALL_ARG2_GREG);
6669 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, ARMV8_A64_REG_X2, idxInstr);
6670# endif
6671 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6672
6673 pCodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6674
6675 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, pCodeBuf, off,
6676 ARMV8_A64_REG_X4, true /*f64Bit*/);
6677
6678#else
6679# error "port me"
6680#endif
6681 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6682 RT_NOREF_PV(idxInstr);
6683 return off;
6684}
6685
6686
6687/**
6688 * Emits a call to a CImpl function or something similar.
6689 */
6690DECL_HIDDEN_THROW(uint32_t)
6691iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6692 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6693{
6694 /* Writeback everything. */
6695 off = iemNativeRegFlushPendingWrites(pReNative, off);
6696
6697 /*
6698 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6699 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6700 */
6701 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6702 fGstShwFlush
6703 | RT_BIT_64(kIemNativeGstReg_Pc)
6704 | RT_BIT_64(kIemNativeGstReg_EFlags));
6705 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6706
6707 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6708
6709 /*
6710 * Load the parameters.
6711 */
6712#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
6713 /* Special code the hidden VBOXSTRICTRC pointer. */
6714 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6715 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6716 if (cAddParams > 0)
6717 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6718 if (cAddParams > 1)
6719# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6720 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam1);
6721# else
6722 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6723# endif
6724 if (cAddParams > 2)
6725# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 6
6726 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG5_GREG, uParam2);
6727# else
6728 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6729# endif
6730 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6731
6732#else
6733 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6734 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6735 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6736 if (cAddParams > 0)
6737 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6738 if (cAddParams > 1)
6739 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6740 if (cAddParams > 2)
6741# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6742 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6743# else
6744 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6745# endif
6746#endif
6747
6748 /*
6749 * Make the call.
6750 */
6751 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6752
6753#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
6754 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6755#endif
6756
6757#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6758 pReNative->Core.fDebugPcInitialized = false;
6759 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6760#endif
6761
6762 /*
6763 * Check the status code.
6764 */
6765 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6766}
6767
6768
6769/**
6770 * Emits a call to a threaded worker function.
6771 */
6772DECL_HIDDEN_THROW(uint32_t)
6773iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6774{
6775 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6776 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6777
6778 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6779 off = iemNativeRegFlushPendingWrites(pReNative, off);
6780
6781 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6782 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6783
6784#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6785 /* The threaded function may throw / long jmp, so set current instruction
6786 number if we're counting. */
6787 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6788#endif
6789
6790 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6791
6792#ifdef RT_ARCH_AMD64
6793 /* Load the parameters and emit the call. */
6794# ifdef RT_OS_WINDOWS
6795# ifndef VBOXSTRICTRC_STRICT_ENABLED
6796 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6797 if (cParams > 0)
6798 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6799 if (cParams > 1)
6800 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6801 if (cParams > 2)
6802 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6803# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6804 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6805 if (cParams > 0)
6806 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6807 if (cParams > 1)
6808 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6809 if (cParams > 2)
6810 {
6811 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6812 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6813 }
6814 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6815# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6816# else
6817 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6818 if (cParams > 0)
6819 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6820 if (cParams > 1)
6821 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6822 if (cParams > 2)
6823 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6824# endif
6825
6826 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6827
6828# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6829 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6830# endif
6831
6832#elif RT_ARCH_ARM64
6833 /*
6834 * ARM64:
6835 */
6836# if !defined(RT_OS_WINDOWS) || !defined(VBOXSTRICTRC_STRICT_ENABLED)
6837 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6838 if (cParams > 0)
6839 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6840 if (cParams > 1)
6841 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6842 if (cParams > 2)
6843 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6844# else
6845 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6846 if (cParams > 0)
6847 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[0]);
6848 if (cParams > 1)
6849 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[1]);
6850 if (cParams > 2)
6851 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, pCallEntry->auParams[2]);
6852 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6853# endif
6854
6855 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6856
6857# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6858 off = iemNativeEmitLoadGprByBpU32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6859# endif
6860
6861#else
6862# error "port me"
6863#endif
6864
6865#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6866 pReNative->Core.fDebugPcInitialized = false;
6867 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6868#endif
6869
6870 /*
6871 * Check the status code.
6872 */
6873 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6874
6875 return off;
6876}
6877
6878
6879/**
6880 * The default liveness function, matching iemNativeEmitThreadedCall.
6881 */
6882IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6883{
6884 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6885 RT_NOREF(pCallEntry);
6886}
6887
6888#ifdef VBOX_WITH_STATISTICS
6889
6890/**
6891 * Emits code to update the thread call statistics.
6892 */
6893DECL_INLINE_THROW(uint32_t)
6894iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6895{
6896 /*
6897 * Update threaded function stats.
6898 */
6899 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6900 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6901# if defined(RT_ARCH_ARM64)
6902 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6903 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6904 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6905 iemNativeRegFreeTmp(pReNative, idxTmp1);
6906 iemNativeRegFreeTmp(pReNative, idxTmp2);
6907# else
6908 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6909# endif
6910 return off;
6911}
6912
6913
6914/**
6915 * Emits code to update the TB exit reason statistics.
6916 */
6917DECL_INLINE_THROW(uint32_t)
6918iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6919{
6920 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6921 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6922 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6923 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6924 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6925
6926 return off;
6927}
6928
6929#endif /* VBOX_WITH_STATISTICS */
6930
6931/**
6932 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6933 */
6934static uint32_t
6935iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6936{
6937 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6938 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6939
6940 /* Jump to ReturnBreak if the return register is NULL. */
6941 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6942 true /*f64Bit*/, offReturnBreak);
6943
6944 /* Okay, continue executing the next TB. */
6945 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6946 return off;
6947}
6948
6949
6950/**
6951 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6952 */
6953static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6954{
6955 /* set the return status */
6956 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6957}
6958
6959
6960/**
6961 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6962 */
6963static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6964{
6965 /* set the return status */
6966 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6967}
6968
6969
6970/**
6971 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6972 */
6973static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6974{
6975 /* set the return status */
6976 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6977}
6978
6979
6980/**
6981 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6982 */
6983static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6984{
6985 /*
6986 * Generate the rc + rcPassUp fiddling code.
6987 */
6988 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6989#ifdef RT_ARCH_AMD64
6990# ifdef RT_OS_WINDOWS
6991# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6992 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6993# endif
6994 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6995 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6996# else
6997 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6999# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7000 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7001# endif
7002# endif
7003# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7004 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7005# endif
7006
7007#else
7008 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7009 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7010 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7011#endif
7012
7013 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7014 return off;
7015}
7016
7017
7018/**
7019 * Emits a standard epilog.
7020 */
7021static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7022{
7023 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7024
7025 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
7026 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7027
7028 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7029 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7030
7031 /*
7032 * Restore registers and return.
7033 */
7034#ifdef RT_ARCH_AMD64
7035 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7036
7037 /* Reposition esp at the r15 restore point. */
7038 pbCodeBuf[off++] = X86_OP_REX_W;
7039 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7040 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7041 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7042
7043 /* Pop non-volatile registers and return */
7044 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7045 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7046 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7047 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7048 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7049 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7050 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7051 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7052# ifdef RT_OS_WINDOWS
7053 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7054 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7055# endif
7056 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7057 pbCodeBuf[off++] = 0xc9; /* leave */
7058 pbCodeBuf[off++] = 0xc3; /* ret */
7059 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7060
7061#elif RT_ARCH_ARM64
7062 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7063
7064 /* ldp x19, x20, [sp #(IEMNATIVE_FRAME_VAR_SIZE+IEMNATIVE_FRAME_ALIGN_SIZE)]! ; Unallocate the variable space and restore x19+x20. */
7065 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_ALIGN_SIZE < 64*8);
7066 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7067 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7068 (IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_ALIGN_SIZE) / 8);
7069 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7070 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7071 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7072 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7073 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7074 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7075 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7076 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7077 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7078 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7079 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7080 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7081
7082 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7083 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7084 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7085 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7086
7087 /* retab / ret */
7088# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7089 if (1)
7090 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7091 else
7092# endif
7093 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7094
7095#else
7096# error "port me"
7097#endif
7098 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7099
7100 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
7101 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7102
7103 return off;
7104}
7105
7106
7107
7108/*********************************************************************************************************************************
7109* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7110*********************************************************************************************************************************/
7111
7112/**
7113 * Internal work that allocates a variable with kind set to
7114 * kIemNativeVarKind_Invalid and no current stack allocation.
7115 *
7116 * The kind will either be set by the caller or later when the variable is first
7117 * assigned a value.
7118 *
7119 * @returns Unpacked index.
7120 * @internal
7121 */
7122DECL_INLINE_THROW(uint8_t) iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7123{
7124 Assert(cbType > 0 && cbType <= 64);
7125 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7126 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7127
7128 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7129
7130 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[idxVar]; /* VS 2019 gets a bit weird on us otherwise. */
7131#if 0
7132 pVar->cbVar = cbType;
7133 pVar->enmKind = kIemNativeVarKind_Invalid;
7134 pVar->fRegAcquired = false;
7135 pVar->fSimdReg = false;
7136 pVar->idxReg = UINT8_MAX;
7137 pVar->uArgNo = UINT8_MAX;
7138 pVar->idxStackSlot = UINT8_MAX;
7139 pVar->idxReferrerVar = UINT8_MAX;
7140 pVar->u.uValue = 0;
7141#else
7142 /* Neither clang 15 nor VC++ 2019 is able to generate this from the above. */
7143 AssertCompileMemberOffset(IEMNATIVEVAR, cbVar, 1);
7144 AssertCompile((int)kIemNativeVarKind_Invalid == 0);
7145 pVar->u32Init0 = (uint32_t)cbType << 8;
7146 pVar->u32Init1 = UINT32_MAX;
7147 pVar->u.uValue = 0;
7148#endif
7149 return idxVar;
7150}
7151
7152
7153/**
7154 * Internal work that allocates an argument variable w/o setting enmKind.
7155 *
7156 * @returns Unpacked index.
7157 * @internal
7158 */
7159static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7160{
7161 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7162 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7163 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7164
7165 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7166 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7167 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7168 return idxVar;
7169}
7170
7171
7172/**
7173 * Gets the stack slot for a stack variable, allocating one if necessary.
7174 *
7175 * Calling this function implies that the stack slot will contain a valid
7176 * variable value. The caller deals with any register currently assigned to the
7177 * variable, typically by spilling it into the stack slot.
7178 *
7179 * @returns The stack slot number.
7180 * @param pReNative The recompiler state.
7181 * @param idxVar The variable.
7182 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7183 */
7184DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7185{
7186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7187 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7188 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7189
7190 /* Already got a slot? */
7191 uint8_t const idxStackSlot = pVar->idxStackSlot;
7192 if (idxStackSlot != UINT8_MAX)
7193 {
7194 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7195 return idxStackSlot;
7196 }
7197
7198 /*
7199 * A single slot is easy to allocate.
7200 * Allocate them from the top end, closest to BP, to reduce the displacement.
7201 */
7202 if (pVar->cbVar <= sizeof(uint64_t))
7203 {
7204 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7205 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7206 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7207 pVar->idxStackSlot = (uint8_t)iSlot;
7208 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7209 return (uint8_t)iSlot;
7210 }
7211
7212 /*
7213 * We need more than one stack slot.
7214 *
7215 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7216 */
7217 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7218 Assert(pVar->cbVar <= 64);
7219 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7220 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7221 uint32_t bmStack = pReNative->Core.bmStack;
7222 while (bmStack != UINT32_MAX)
7223 {
7224 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7225 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7226 iSlot = (iSlot - 1) & ~fBitAlignMask;
7227 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7228 {
7229 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7230 pVar->idxStackSlot = (uint8_t)iSlot;
7231 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7232 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7233 return (uint8_t)iSlot;
7234 }
7235
7236 bmStack |= (fBitAllocMask << iSlot);
7237 }
7238 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7239}
7240
7241
7242/**
7243 * Changes the variable to a stack variable.
7244 *
7245 * Currently this is s only possible to do the first time the variable is used,
7246 * switching later is can be implemented but not done.
7247 *
7248 * @param pReNative The recompiler state.
7249 * @param idxVar The variable.
7250 * @throws VERR_IEM_VAR_IPE_2
7251 */
7252DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7253{
7254 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7255 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7256 if (pVar->enmKind != kIemNativeVarKind_Stack)
7257 {
7258 /* We could in theory transition from immediate to stack as well, but it
7259 would involve the caller doing work storing the value on the stack. So,
7260 till that's required we only allow transition from invalid. */
7261 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7262 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7263 pVar->enmKind = kIemNativeVarKind_Stack;
7264
7265 /* Note! We don't allocate a stack slot here, that's only done when a
7266 slot is actually needed to hold a variable value. */
7267 }
7268}
7269
7270
7271/**
7272 * Sets it to a variable with a constant value.
7273 *
7274 * This does not require stack storage as we know the value and can always
7275 * reload it, unless of course it's referenced.
7276 *
7277 * @param pReNative The recompiler state.
7278 * @param idxVar The variable.
7279 * @param uValue The immediate value.
7280 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7281 */
7282DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7283{
7284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7285 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7286 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7287 {
7288 /* Only simple transitions for now. */
7289 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7290 pVar->enmKind = kIemNativeVarKind_Immediate;
7291 }
7292 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7293
7294 pVar->u.uValue = uValue;
7295 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7296 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7297 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7298}
7299
7300
7301/**
7302 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7303 *
7304 * This does not require stack storage as we know the value and can always
7305 * reload it. Loading is postponed till needed.
7306 *
7307 * @param pReNative The recompiler state.
7308 * @param idxVar The variable. Unpacked.
7309 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7310 *
7311 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7312 * @internal
7313 */
7314static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7315{
7316 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7317 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7318
7319 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7320 {
7321 /* Only simple transitions for now. */
7322 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7323 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7324 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7325 }
7326 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7327
7328 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7329
7330 /* Update the other variable, ensure it's a stack variable. */
7331 /** @todo handle variables with const values... that'll go boom now. */
7332 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7333 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7334}
7335
7336
7337/**
7338 * Sets the variable to a reference (pointer) to a guest register reference.
7339 *
7340 * This does not require stack storage as we know the value and can always
7341 * reload it. Loading is postponed till needed.
7342 *
7343 * @param pReNative The recompiler state.
7344 * @param idxVar The variable.
7345 * @param enmRegClass The class guest registers to reference.
7346 * @param idxReg The register within @a enmRegClass to reference.
7347 *
7348 * @throws VERR_IEM_VAR_IPE_2
7349 */
7350DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7351 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7352{
7353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7354 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7355
7356 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7357 {
7358 /* Only simple transitions for now. */
7359 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7360 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7361 }
7362 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7363
7364 pVar->u.GstRegRef.enmClass = enmRegClass;
7365 pVar->u.GstRegRef.idx = idxReg;
7366}
7367
7368
7369DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7370{
7371 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7372}
7373
7374
7375DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7376{
7377 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7378
7379 /* Since we're using a generic uint64_t value type, we must truncate it if
7380 the variable is smaller otherwise we may end up with too large value when
7381 scaling up a imm8 w/ sign-extension.
7382
7383 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7384 in the bios, bx=1) when running on arm, because clang expect 16-bit
7385 register parameters to have bits 16 and up set to zero. Instead of
7386 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7387 CF value in the result. */
7388 switch (cbType)
7389 {
7390 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7391 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7392 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7393 }
7394 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7395 return idxVar;
7396}
7397
7398
7399DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7400{
7401 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7402 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7403 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7404 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7405 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7406 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7407
7408 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7409 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7410 return idxArgVar;
7411}
7412
7413
7414DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7415{
7416 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7417 /* Don't set to stack now, leave that to the first use as for instance
7418 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7419 return idxVar;
7420}
7421
7422
7423DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7424{
7425 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7426
7427 /* Since we're using a generic uint64_t value type, we must truncate it if
7428 the variable is smaller otherwise we may end up with too large value when
7429 scaling up a imm8 w/ sign-extension. */
7430 switch (cbType)
7431 {
7432 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7433 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7434 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7435 }
7436 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7437 return idxVar;
7438}
7439
7440
7441DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7442 uint8_t cbType, uint8_t idxVarOther)
7443{
7444 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7445 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7446
7447 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarOther, poff);
7448 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7449
7450/** @todo combine MOV and AND using MOVZX/similar. */
7451 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7452
7453 /* Truncate the value to this variables size. */
7454 switch (cbType)
7455 {
7456 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7457 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7458 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7459 }
7460
7461 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7462 iemNativeVarRegisterRelease(pReNative, idxVar);
7463 return idxVar;
7464}
7465
7466
7467/**
7468 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7469 * fixed till we call iemNativeVarRegisterRelease.
7470 *
7471 * @returns The host register number.
7472 * @param pReNative The recompiler state.
7473 * @param idxVar The variable.
7474 * @param poff Pointer to the instruction buffer offset.
7475 * In case a register needs to be freed up or the value
7476 * loaded off the stack.
7477 * @param idxRegPref Preferred register number or UINT8_MAX.
7478 *
7479 * @tparam a_fInitialized Set if the variable must already have been
7480 * initialized. Will throw VERR_IEM_VAR_NOT_INITIALIZED
7481 * if this is not the case.
7482 * @tparam a_fWithRegPref If idxRegPref is valid.
7483 *
7484 * @note Must not modify the host status flags!
7485 */
7486template<bool const a_fInitialized, bool const a_fWithRegPref>
7487DECL_FORCE_INLINE_THROW(uint8_t)
7488iemNativeVarRegisterAcquireSlowInt(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7489{
7490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7491 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7492 Assert(pVar->cbVar <= 8);
7493 Assert(!pVar->fRegAcquired);
7494 Assert(!a_fWithRegPref || idxRegPref < RT_ELEMENTS(pReNative->Core.aHstRegs));
7495
7496 /* This slow code path only handles the case where no register have been
7497 allocated for the variable yet. */
7498 Assert(pVar->idxReg == UINT8_MAX);
7499
7500 /*
7501 * If the kind of variable has not yet been set, default to 'stack'.
7502 */
7503 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7504 && pVar->enmKind < kIemNativeVarKind_End);
7505 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7506 iemNativeVarSetKindToStack(pReNative, idxVar);
7507
7508 /*
7509 * We have to allocate a register for the variable, even if its a stack one
7510 * as we don't know if there are modification being made to it before its
7511 * finalized (todo: analyze and insert hints about that?).
7512 *
7513 * If we can, we try get the correct register for argument variables. This
7514 * is assuming that most argument variables are fetched as close as possible
7515 * to the actual call, so that there aren't any interfering hidden calls
7516 * (memory accesses, etc) inbetween.
7517 *
7518 * If we cannot or it's a variable, we make sure no argument registers
7519 * that will be used by this MC block will be allocated here, and we always
7520 * prefer non-volatile registers to avoid needing to spill stuff for internal
7521 * call.
7522 */
7523 /** @todo Detect too early argument value fetches and warn about hidden
7524 * calls causing less optimal code to be generated in the python script. */
7525
7526 uint8_t idxReg;
7527 uint8_t const uArgNo = pVar->uArgNo;
7528 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7529 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7530 {
7531 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7532
7533#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7534 /* Writeback any dirty shadow registers we are about to unshadow. */
7535 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7536#endif
7537
7538 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7539 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7540 }
7541 else if ( !a_fWithRegPref
7542 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7543 {
7544 /** @todo there must be a better way for this and boot cArgsX? */
7545 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7546 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7547 & ~pReNative->Core.bmHstRegsWithGstShadow
7548 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7549 & fNotArgsMask;
7550 if (fRegs)
7551 {
7552 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7553 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7554 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7555 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7556 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7557 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7558 }
7559 else
7560 {
7561 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7562 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7563 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7564 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7565 }
7566 }
7567 else
7568 {
7569 idxReg = idxRegPref;
7570 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7571 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7572 }
7573 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7574 pVar->idxReg = idxReg;
7575 pVar->fSimdReg = false;
7576
7577 /*
7578 * Load it off the stack if we've got a stack slot.
7579 */
7580 uint8_t const idxStackSlot = pVar->idxStackSlot;
7581 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7582 {
7583 Assert(a_fInitialized);
7584 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7585 switch (pVar->cbVar)
7586 {
7587 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7588 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7589 case 3: AssertFailed(); RT_FALL_THRU();
7590 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7591 default: AssertFailed(); RT_FALL_THRU();
7592 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7593 }
7594 }
7595 else
7596 {
7597 Assert(idxStackSlot == UINT8_MAX);
7598 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7599 AssertStmt(!a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7600 else
7601 {
7602 /*
7603 * Convert from immediate to stack/register. This is currently only
7604 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7605 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7606 */
7607 AssertStmt(a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7608 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7609 idxVar, idxReg, pVar->u.uValue));
7610 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7611 pVar->enmKind = kIemNativeVarKind_Stack;
7612 }
7613 }
7614
7615 pVar->fRegAcquired = true;
7616 return idxReg;
7617}
7618
7619
7620/** See iemNativeVarRegisterAcquireSlowInt for details. */
7621DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7622{
7623 /* very likely */
7624 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 0]);
7625 return iemNativeVarRegisterAcquireSlowInt<false, false>(pReNative, idxVar, poff, UINT8_MAX);
7626}
7627
7628
7629/** See iemNativeVarRegisterAcquireSlowInt for details. */
7630DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireInitedSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7631{
7632 /* even more likely */
7633 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 2]);
7634 return iemNativeVarRegisterAcquireSlowInt<true, false>(pReNative, idxVar, poff, UINT8_MAX);
7635}
7636
7637
7638/** See iemNativeVarRegisterAcquireSlowInt for details. */
7639DECL_HIDDEN_THROW(uint8_t)
7640iemNativeVarRegisterAcquireWithPrefSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7641{
7642 /* unused */
7643 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 4]);
7644 return iemNativeVarRegisterAcquireSlowInt<false, true>(pReNative, idxVar, poff, idxRegPref);
7645}
7646
7647
7648/** See iemNativeVarRegisterAcquireSlowInt for details. */
7649DECL_HIDDEN_THROW(uint8_t)
7650iemNativeVarRegisterAcquireInitedWithPrefSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7651{
7652 /* very very likely */
7653 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 6]);
7654 return iemNativeVarRegisterAcquireSlowInt<true, true>(pReNative, idxVar, poff, idxRegPref);
7655}
7656
7657
7658/**
7659 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7660 * fixed till we call iemNativeVarRegisterRelease.
7661 *
7662 * @returns The host register number.
7663 * @param pReNative The recompiler state.
7664 * @param idxVar The variable.
7665 * @param poff Pointer to the instruction buffer offset.
7666 * In case a register needs to be freed up or the value
7667 * loaded off the stack.
7668 * @param fInitialized Set if the variable must already have been initialized.
7669 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7670 * the case.
7671 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7672 */
7673/** @todo Create variants for the last two params like we've done for the
7674 * GPR variant? */
7675DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7676 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7677{
7678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7679 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7680 Assert( pVar->cbVar == sizeof(RTUINT128U)
7681 || pVar->cbVar == sizeof(RTUINT256U));
7682 Assert(!pVar->fRegAcquired);
7683
7684/** @todo inline this bit? */
7685 uint8_t idxReg = pVar->idxReg;
7686 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7687 {
7688 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7689 && pVar->enmKind < kIemNativeVarKind_End);
7690 pVar->fRegAcquired = true;
7691 return idxReg;
7692 }
7693
7694 /*
7695 * If the kind of variable has not yet been set, default to 'stack'.
7696 */
7697 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7698 && pVar->enmKind < kIemNativeVarKind_End);
7699 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7700 iemNativeVarSetKindToStack(pReNative, idxVar);
7701
7702 /*
7703 * We have to allocate a register for the variable, even if its a stack one
7704 * as we don't know if there are modification being made to it before its
7705 * finalized (todo: analyze and insert hints about that?).
7706 *
7707 * If we can, we try get the correct register for argument variables. This
7708 * is assuming that most argument variables are fetched as close as possible
7709 * to the actual call, so that there aren't any interfering hidden calls
7710 * (memory accesses, etc) inbetween.
7711 *
7712 * If we cannot or it's a variable, we make sure no argument registers
7713 * that will be used by this MC block will be allocated here, and we always
7714 * prefer non-volatile registers to avoid needing to spill stuff for internal
7715 * call.
7716 */
7717 /** @todo Detect too early argument value fetches and warn about hidden
7718 * calls causing less optimal code to be generated in the python script. */
7719
7720 uint8_t const uArgNo = pVar->uArgNo;
7721 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7722
7723 /* SIMD is bit simpler for now because there is no support for arguments. */
7724 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7725 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7726 {
7727 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7728 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7729 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7730 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7731 & fNotArgsMask;
7732 if (fRegs)
7733 {
7734 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7735 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7736 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7737 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7738 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7739 }
7740 else
7741 {
7742 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7743 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7744 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7745 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7746 }
7747 }
7748 else
7749 {
7750 idxReg = idxRegPref;
7751 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7752 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7753 }
7754 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7755 pVar->idxReg = idxReg;
7756 pVar->fSimdReg = true;
7757
7758 /*
7759 * Load it off the stack if we've got a stack slot.
7760 */
7761 uint8_t const idxStackSlot = pVar->idxStackSlot;
7762 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7763 {
7764 Assert(fInitialized);
7765 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7766 switch (pVar->cbVar)
7767 {
7768 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7769 default: AssertFailed(); RT_FALL_THRU();
7770 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7771 }
7772 }
7773 else
7774 {
7775 Assert(idxStackSlot == UINT8_MAX);
7776 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7777 }
7778 pVar->fRegAcquired = true;
7779 return idxReg;
7780}
7781
7782
7783/**
7784 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7785 * guest register.
7786 *
7787 * This function makes sure there is a register for it and sets it to be the
7788 * current shadow copy of @a enmGstReg.
7789 *
7790 * @returns The host register number.
7791 * @param pReNative The recompiler state.
7792 * @param idxVar The variable.
7793 * @param enmGstReg The guest register this variable will be written to
7794 * after this call.
7795 * @param poff Pointer to the instruction buffer offset.
7796 * In case a register needs to be freed up or if the
7797 * variable content needs to be loaded off the stack.
7798 *
7799 * @note We DO NOT expect @a idxVar to be an argument variable,
7800 * because we can only in the commit stage of an instruction when this
7801 * function is used.
7802 */
7803DECL_HIDDEN_THROW(uint8_t)
7804iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7805{
7806 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7807 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7808 Assert(!pVar->fRegAcquired);
7809 AssertMsgStmt( pVar->cbVar <= 8
7810 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7811 || pVar->enmKind == kIemNativeVarKind_Stack),
7812 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7813 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7814 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7815
7816 /*
7817 * This shouldn't ever be used for arguments, unless it's in a weird else
7818 * branch that doesn't do any calling and even then it's questionable.
7819 *
7820 * However, in case someone writes crazy wrong MC code and does register
7821 * updates before making calls, just use the regular register allocator to
7822 * ensure we get a register suitable for the intended argument number.
7823 */
7824 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7825
7826 /*
7827 * If there is already a register for the variable, we transfer/set the
7828 * guest shadow copy assignment to it.
7829 */
7830 uint8_t idxReg = pVar->idxReg;
7831 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7832 {
7833#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7834 AssertCompile(kIemNativeGstReg_GprFirst == 0);
7835 if (enmGstReg <= kIemNativeGstReg_GprLast)
7836 {
7837# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7838 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7839 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7840# endif
7841 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7842 }
7843#endif
7844
7845 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7846 {
7847 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7848 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7849 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7850 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7851 }
7852 else
7853 {
7854 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7855 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7856 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7857 }
7858 pVar->fRegAcquired = true;
7859 return idxReg;
7860 }
7861 Assert(pVar->uArgNo == UINT8_MAX);
7862
7863 /*
7864 * Because this is supposed to be the commit stage, we're just tag along with the
7865 * temporary register allocator and upgrade it to a variable register.
7866 */
7867 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7868 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7869 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7870 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7871 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7872 pVar->idxReg = idxReg;
7873
7874 /*
7875 * Now we need to load the register value.
7876 */
7877 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7878 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7879 else
7880 {
7881 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7882 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7883 switch (pVar->cbVar)
7884 {
7885 case sizeof(uint64_t):
7886 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7887 break;
7888 case sizeof(uint32_t):
7889 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7890 break;
7891 case sizeof(uint16_t):
7892 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7893 break;
7894 case sizeof(uint8_t):
7895 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7896 break;
7897 default:
7898 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7899 }
7900 }
7901
7902 pVar->fRegAcquired = true;
7903 return idxReg;
7904}
7905
7906
7907/**
7908 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7909 *
7910 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7911 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7912 * requirement of flushing anything in volatile host registers when making a
7913 * call.
7914 *
7915 * @returns New @a off value.
7916 * @param pReNative The recompiler state.
7917 * @param off The code buffer position.
7918 * @param fHstGprNotToSave Set of GPRs not to save & restore.
7919 */
7920DECL_HIDDEN_THROW(uint32_t)
7921iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstGprNotToSave)
7922{
7923 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstGprNotToSave;
7924 if (fHstRegs)
7925 {
7926 do
7927 {
7928 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7929 fHstRegs &= ~RT_BIT_32(idxHstReg);
7930
7931 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7932 {
7933 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7934 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7935 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7936 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7937 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7938 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7939 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7940 {
7941 case kIemNativeVarKind_Stack:
7942 {
7943 /* Temporarily spill the variable register. */
7944 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7945 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7946 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7947 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7948 continue;
7949 }
7950
7951 case kIemNativeVarKind_Immediate:
7952 case kIemNativeVarKind_VarRef:
7953 case kIemNativeVarKind_GstRegRef:
7954 /* It is weird to have any of these loaded at this point. */
7955 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7956 continue;
7957
7958 case kIemNativeVarKind_End:
7959 case kIemNativeVarKind_Invalid:
7960 break;
7961 }
7962 AssertFailed();
7963 }
7964 else
7965 {
7966 /*
7967 * Allocate a temporary stack slot and spill the register to it.
7968 */
7969 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7970 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7971 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7972 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7973 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7974 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7975 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7976 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7977 }
7978 } while (fHstRegs);
7979 }
7980
7981 /*
7982 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7983 * which would be more difficult due to spanning multiple stack slots and different sizes
7984 * (besides we only have a limited amount of slots at the moment).
7985 *
7986 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7987 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7988 */
7989 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7990
7991 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
7992 if (fHstRegs)
7993 {
7994 do
7995 {
7996 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7997 fHstRegs &= ~RT_BIT_32(idxHstReg);
7998
7999 /* Fixed reserved and temporary registers don't need saving. */
8000 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved
8001 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp); included below */
8002 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8003
8004 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8006 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8007 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8008 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8009 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8010 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8011 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8012 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8013 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8014 {
8015 case kIemNativeVarKind_Stack:
8016 {
8017 /* Temporarily spill the variable register. */
8018 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8019 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8020 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8021 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8022 if (cbVar == sizeof(RTUINT128U))
8023 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8024 else
8025 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8026 continue;
8027 }
8028
8029 case kIemNativeVarKind_Immediate:
8030 case kIemNativeVarKind_VarRef:
8031 case kIemNativeVarKind_GstRegRef:
8032 /* It is weird to have any of these loaded at this point. */
8033 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8034 continue;
8035
8036 case kIemNativeVarKind_End:
8037 case kIemNativeVarKind_Invalid:
8038 break;
8039 }
8040 AssertFailed();
8041 } while (fHstRegs);
8042 }
8043 return off;
8044}
8045
8046
8047/**
8048 * Emit code to restore volatile registers after to a call to a helper.
8049 *
8050 * @returns New @a off value.
8051 * @param pReNative The recompiler state.
8052 * @param off The code buffer position.
8053 * @param fHstGprNotToSave Set of registers not to save & restore.
8054 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8055 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8056 */
8057DECL_HIDDEN_THROW(uint32_t)
8058iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstGprNotToSave)
8059{
8060 /*
8061 * GPRs
8062 */
8063 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstGprNotToSave;
8064 if (fHstRegs)
8065 {
8066 do
8067 {
8068 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8069 fHstRegs &= ~RT_BIT_32(idxHstReg);
8070
8071 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8072 {
8073 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8074 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8075 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8076 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8077 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8078 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8079 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8080 {
8081 case kIemNativeVarKind_Stack:
8082 {
8083 /* Unspill the variable register. */
8084 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8085 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8086 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8087 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8088 continue;
8089 }
8090
8091 case kIemNativeVarKind_Immediate:
8092 case kIemNativeVarKind_VarRef:
8093 case kIemNativeVarKind_GstRegRef:
8094 /* It is weird to have any of these loaded at this point. */
8095 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8096 continue;
8097
8098 case kIemNativeVarKind_End:
8099 case kIemNativeVarKind_Invalid:
8100 break;
8101 }
8102 AssertFailed();
8103 }
8104 else
8105 {
8106 /*
8107 * Restore from temporary stack slot.
8108 */
8109 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8110 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8111 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8112 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8113
8114 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8115 }
8116 } while (fHstRegs);
8117 }
8118
8119 /*
8120 * SIMD registers.
8121 */
8122 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
8123 if (fHstRegs)
8124 {
8125 do
8126 {
8127 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8128 fHstRegs &= ~RT_BIT_32(idxHstReg);
8129
8130 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp
8131 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved); - included below. */
8132 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8133
8134 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8135 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8136 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8137 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8138 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8139 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8140 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8141 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8142 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8143 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8144 {
8145 case kIemNativeVarKind_Stack:
8146 {
8147 /* Unspill the variable register. */
8148 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8149 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8150 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8151 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8152
8153 if (cbVar == sizeof(RTUINT128U))
8154 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8155 else
8156 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8157 continue;
8158 }
8159
8160 case kIemNativeVarKind_Immediate:
8161 case kIemNativeVarKind_VarRef:
8162 case kIemNativeVarKind_GstRegRef:
8163 /* It is weird to have any of these loaded at this point. */
8164 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8165 continue;
8166
8167 case kIemNativeVarKind_End:
8168 case kIemNativeVarKind_Invalid:
8169 break;
8170 }
8171 AssertFailed();
8172 } while (fHstRegs);
8173 }
8174 return off;
8175}
8176
8177
8178/**
8179 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8180 *
8181 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8182 *
8183 * ASSUMES that @a idxVar is valid and unpacked.
8184 */
8185DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8186{
8187 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8188 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8189 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8190 {
8191 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8192 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8193 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8194 Assert(cSlots > 0);
8195 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8196 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8197 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8198 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8199 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8200 }
8201 else
8202 Assert(idxStackSlot == UINT8_MAX);
8203}
8204
8205
8206/**
8207 * Worker that frees a single variable.
8208 *
8209 * ASSUMES that @a idxVar is valid and unpacked.
8210 */
8211DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8212{
8213 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8214 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8215 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8216
8217 /* Free the host register first if any assigned. */
8218 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8219 if (idxHstReg != UINT8_MAX)
8220 {
8221 if (!pReNative->Core.aVars[idxVar].fSimdReg)
8222 {
8223 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8224 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8225 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8226 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8227 }
8228 else
8229 {
8230 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8231 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8232 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8233 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8234 }
8235 }
8236
8237 /* Free argument mapping. */
8238 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8239 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8240 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8241
8242 /* Free the stack slots. */
8243 iemNativeVarFreeStackSlots(pReNative, idxVar);
8244
8245 /* Free the actual variable. */
8246 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8247 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8248}
8249
8250
8251/**
8252 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8253 */
8254DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8255{
8256 while (bmVars != 0)
8257 {
8258 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8259 bmVars &= ~RT_BIT_32(idxVar);
8260
8261#if 1 /** @todo optimize by simplifying this later... */
8262 iemNativeVarFreeOneWorker(pReNative, idxVar);
8263#else
8264 /* Only need to free the host register, the rest is done as bulk updates below. */
8265 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8266 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8267 {
8268 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8269 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8270 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8271 }
8272#endif
8273 }
8274#if 0 /** @todo optimize by simplifying this later... */
8275 pReNative->Core.bmVars = 0;
8276 pReNative->Core.bmStack = 0;
8277 pReNative->Core.u64ArgVars = UINT64_MAX;
8278#endif
8279}
8280
8281
8282
8283/*********************************************************************************************************************************
8284* Emitters for IEM_MC_CALL_CIMPL_XXX *
8285*********************************************************************************************************************************/
8286
8287/**
8288 * Emits code to load a reference to the given guest register into @a idxGprDst.
8289 */
8290DECL_HIDDEN_THROW(uint32_t)
8291iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8292 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8293{
8294#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8295 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8296#endif
8297
8298 /*
8299 * Get the offset relative to the CPUMCTX structure.
8300 */
8301 uint32_t offCpumCtx;
8302 switch (enmClass)
8303 {
8304 case kIemNativeGstRegRef_Gpr:
8305 Assert(idxRegInClass < 16);
8306 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8307 break;
8308
8309 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8310 Assert(idxRegInClass < 4);
8311 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8312 break;
8313
8314 case kIemNativeGstRegRef_EFlags:
8315 Assert(idxRegInClass == 0);
8316 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8317 break;
8318
8319 case kIemNativeGstRegRef_MxCsr:
8320 Assert(idxRegInClass == 0);
8321 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8322 break;
8323
8324 case kIemNativeGstRegRef_FpuReg:
8325 Assert(idxRegInClass < 8);
8326 AssertFailed(); /** @todo what kind of indexing? */
8327 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8328 break;
8329
8330 case kIemNativeGstRegRef_MReg:
8331 Assert(idxRegInClass < 8);
8332 AssertFailed(); /** @todo what kind of indexing? */
8333 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8334 break;
8335
8336 case kIemNativeGstRegRef_XReg:
8337 Assert(idxRegInClass < 16);
8338 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8339 break;
8340
8341 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8342 Assert(idxRegInClass == 0);
8343 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8344 break;
8345
8346 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8347 Assert(idxRegInClass == 0);
8348 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8349 break;
8350
8351 default:
8352 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8353 }
8354
8355 /*
8356 * Load the value into the destination register.
8357 */
8358#ifdef RT_ARCH_AMD64
8359 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8360
8361#elif defined(RT_ARCH_ARM64)
8362 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8363 Assert(offCpumCtx < 4096);
8364 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8365
8366#else
8367# error "Port me!"
8368#endif
8369
8370 return off;
8371}
8372
8373
8374/**
8375 * Common code for CIMPL and AIMPL calls.
8376 *
8377 * These are calls that uses argument variables and such. They should not be
8378 * confused with internal calls required to implement an MC operation,
8379 * like a TLB load and similar.
8380 *
8381 * Upon return all that is left to do is to load any hidden arguments and
8382 * perform the call. All argument variables are freed.
8383 *
8384 * @returns New code buffer offset; throws VBox status code on error.
8385 * @param pReNative The native recompile state.
8386 * @param off The code buffer offset.
8387 * @param cArgs The total nubmer of arguments (includes hidden
8388 * count).
8389 * @param cHiddenArgs The number of hidden arguments. The hidden
8390 * arguments must not have any variable declared for
8391 * them, whereas all the regular arguments must
8392 * (tstIEMCheckMc ensures this).
8393 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8394 * this will still flush pending writes in call volatile registers if false.
8395 */
8396DECL_HIDDEN_THROW(uint32_t)
8397iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8398 bool fFlushPendingWrites /*= true*/)
8399{
8400#ifdef VBOX_STRICT
8401 /*
8402 * Assert sanity.
8403 */
8404 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8405 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8406 for (unsigned i = 0; i < cHiddenArgs; i++)
8407 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8408 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8409 {
8410 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8411 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8412 }
8413 iemNativeRegAssertSanity(pReNative);
8414#endif
8415
8416 /* We don't know what the called function makes use of, so flush any pending register writes. */
8417 RT_NOREF(fFlushPendingWrites);
8418#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8419 if (fFlushPendingWrites)
8420#endif
8421 off = iemNativeRegFlushPendingWrites(pReNative, off);
8422
8423 /*
8424 * Before we do anything else, go over variables that are referenced and
8425 * make sure they are not in a register.
8426 */
8427 uint32_t bmVars = pReNative->Core.bmVars;
8428 if (bmVars)
8429 {
8430 do
8431 {
8432 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8433 bmVars &= ~RT_BIT_32(idxVar);
8434
8435 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8436 {
8437 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8438 if (idxRegOld != UINT8_MAX)
8439 {
8440 if (!pReNative->Core.aVars[idxVar].fSimdReg)
8441 {
8442 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
8443
8444 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8445 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8446 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8447 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8448 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8449
8450 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8451 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8452 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8453 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8454 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8455 }
8456 else
8457 {
8458 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8459 Assert( pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U)
8460 || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8461
8462 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8463 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8464 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8465 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8466 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8467 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off,
8468 iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8469 else
8470 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off,
8471 iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8472
8473 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8474 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8475
8476 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8477 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8478 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8479 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8480 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8481 }
8482 }
8483 }
8484 } while (bmVars != 0);
8485#if 0 //def VBOX_STRICT
8486 iemNativeRegAssertSanity(pReNative);
8487#endif
8488 }
8489
8490 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8491
8492#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8493 /*
8494 * At the very first step go over the host registers that will be used for arguments
8495 * don't shadow anything which needs writing back first.
8496 */
8497 for (uint32_t i = 0; i < cRegArgs; i++)
8498 {
8499 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8500
8501 /* Writeback any dirty guest shadows before using this register. */
8502 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8503 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8504 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8505 }
8506#endif
8507
8508 /*
8509 * First, go over the host registers that will be used for arguments and make
8510 * sure they either hold the desired argument or are free.
8511 */
8512 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8513 {
8514 for (uint32_t i = 0; i < cRegArgs; i++)
8515 {
8516 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8517 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8518 {
8519 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8520 {
8521 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8523 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8524 Assert(pVar->idxReg == idxArgReg);
8525 uint8_t const uArgNo = pVar->uArgNo;
8526 if (uArgNo == i)
8527 { /* prefect */ }
8528 /* The variable allocator logic should make sure this is impossible,
8529 except for when the return register is used as a parameter (ARM,
8530 but not x86). */
8531#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8532 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8533 {
8534# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8535# error "Implement this"
8536# endif
8537 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8538 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8539 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8540 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8541 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8542 }
8543#endif
8544 else
8545 {
8546 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8547
8548 if (pVar->enmKind == kIemNativeVarKind_Stack)
8549 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8550 else
8551 {
8552 /* just free it, can be reloaded if used again */
8553 pVar->idxReg = UINT8_MAX;
8554 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8555 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8556 }
8557 }
8558 }
8559 else
8560 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8561 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8562 }
8563 }
8564#if 0 //def VBOX_STRICT
8565 iemNativeRegAssertSanity(pReNative);
8566#endif
8567 }
8568
8569 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8570
8571#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8572 /*
8573 * If there are any stack arguments, make sure they are in their place as well.
8574 *
8575 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8576 * the caller) be loading it later and it must be free (see first loop).
8577 */
8578 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8579 {
8580 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8581 {
8582 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8583 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8584 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8585 {
8586 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8587 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8588 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8589 pVar->idxReg = UINT8_MAX;
8590 }
8591 else
8592 {
8593 /* Use ARG0 as temp for stuff we need registers for. */
8594 switch (pVar->enmKind)
8595 {
8596 case kIemNativeVarKind_Stack:
8597 {
8598 uint8_t const idxStackSlot = pVar->idxStackSlot;
8599 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8600 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8601 iemNativeStackCalcBpDisp(idxStackSlot));
8602 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8603 continue;
8604 }
8605
8606 case kIemNativeVarKind_Immediate:
8607 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8608 continue;
8609
8610 case kIemNativeVarKind_VarRef:
8611 {
8612 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8613 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8614 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8615 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8616 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8617 if (idxRegOther != UINT8_MAX)
8618 {
8619 if (!pReNative->Core.aVars[idxOtherVar].fSimdReg)
8620 {
8621 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs));
8622 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8623 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8624 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8625 }
8626 else
8627 {
8628 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8629 if (pReNative->Core.aVars[idxOtherVar].cbVar == sizeof(RTUINT128U))
8630 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8631 else
8632 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8633 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8634 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8635 }
8636 }
8637 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8638 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8639 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8640 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8641 continue;
8642 }
8643
8644 case kIemNativeVarKind_GstRegRef:
8645 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8646 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8647 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8648 continue;
8649
8650 case kIemNativeVarKind_Invalid:
8651 case kIemNativeVarKind_End:
8652 break;
8653 }
8654 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8655 }
8656 }
8657# if 0 //def VBOX_STRICT
8658 iemNativeRegAssertSanity(pReNative);
8659# endif
8660 }
8661#else
8662 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8663#endif
8664
8665 /*
8666 * Make sure the argument variables are loaded into their respective registers.
8667 *
8668 * We can optimize this by ASSUMING that any register allocations are for
8669 * registeres that have already been loaded and are ready. The previous step
8670 * saw to that.
8671 */
8672 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8673 {
8674 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8675 {
8676 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8677 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8678 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8679 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8680 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8681 else
8682 {
8683 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8684 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8685 {
8686 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8687 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8688 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8689 | RT_BIT_32(idxArgReg);
8690 pVar->idxReg = idxArgReg;
8691 }
8692 else
8693 {
8694 /* Use ARG0 as temp for stuff we need registers for. */
8695 switch (pVar->enmKind)
8696 {
8697 case kIemNativeVarKind_Stack:
8698 {
8699 uint8_t const idxStackSlot = pVar->idxStackSlot;
8700 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8701 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8702 continue;
8703 }
8704
8705 case kIemNativeVarKind_Immediate:
8706 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8707 continue;
8708
8709 case kIemNativeVarKind_VarRef:
8710 {
8711 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8712 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8713 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8714 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8715 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8716 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8717 if (idxRegOther != UINT8_MAX)
8718 {
8719 if (!pReNative->Core.aVars[idxOtherVar].fSimdReg)
8720 {
8721 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs));
8722 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8723 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8724 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8725 }
8726 else
8727 {
8728 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8729 if (pReNative->Core.aVars[idxOtherVar].cbVar == sizeof(RTUINT128U))
8730 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8731 else
8732 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8733 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8734 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8735 }
8736 }
8737 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8738 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8739 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8740 continue;
8741 }
8742
8743 case kIemNativeVarKind_GstRegRef:
8744 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8745 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8746 continue;
8747
8748 case kIemNativeVarKind_Invalid:
8749 case kIemNativeVarKind_End:
8750 break;
8751 }
8752 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8753 }
8754 }
8755 }
8756#if 0 //def VBOX_STRICT
8757 iemNativeRegAssertSanity(pReNative);
8758#endif
8759 }
8760#ifdef VBOX_STRICT
8761 else
8762 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8763 {
8764 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8765 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8766 }
8767#endif
8768
8769 /*
8770 * Free all argument variables (simplified).
8771 * Their lifetime always expires with the call they are for.
8772 */
8773 /** @todo Make the python script check that arguments aren't used after
8774 * IEM_MC_CALL_XXXX. */
8775 /** @todo There is a special with IEM_MC_MEM_SEG_MAP_U16_RW and friends requiring
8776 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8777 * an argument value. There is also some FPU stuff. */
8778 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8779 {
8780 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8781 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8782
8783 /* no need to free registers: */
8784 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8785 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8786 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8787 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8788 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8789 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8790
8791 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8792 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8793 iemNativeVarFreeStackSlots(pReNative, idxVar);
8794 }
8795 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8796
8797 /*
8798 * Flush volatile registers as we make the call.
8799 */
8800 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8801
8802 return off;
8803}
8804
8805
8806
8807/*********************************************************************************************************************************
8808* TLB Lookup. *
8809*********************************************************************************************************************************/
8810
8811/**
8812 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8813 */
8814DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8815{
8816 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8817 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8818 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8819 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8820 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8821
8822 /* Do the lookup manually. */
8823 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8824 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(pVCpu, GCPtrFlat);
8825 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8826 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8827 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8828 {
8829 /*
8830 * Check TLB page table level access flags.
8831 */
8832 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8833 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8834 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8835 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8836 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8837 | IEMTLBE_F_PG_UNASSIGNED
8838 | IEMTLBE_F_PT_NO_ACCESSED
8839 | fNoWriteNoDirty | fNoUser);
8840 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8841 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8842 {
8843 /*
8844 * Return the address.
8845 */
8846 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8847 if ((uintptr_t)pbAddr == uResult)
8848 return;
8849 RT_NOREF(cbMem);
8850 AssertFailed();
8851 }
8852 else
8853 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8854 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8855 }
8856 else
8857 AssertFailed();
8858 RT_BREAKPOINT();
8859}
8860
8861/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8862
8863
8864
8865/*********************************************************************************************************************************
8866* Recompiler Core. *
8867*********************************************************************************************************************************/
8868
8869/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8870static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8871{
8872 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8873 pDis->cbCachedInstr += cbMaxRead;
8874 RT_NOREF(cbMinRead);
8875 return VERR_NO_DATA;
8876}
8877
8878
8879DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8880{
8881 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8882 {
8883#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8884 ENTRY(fLocalForcedActions),
8885 ENTRY(iem.s.rcPassUp),
8886 ENTRY(iem.s.fExec),
8887 ENTRY(iem.s.pbInstrBuf),
8888 ENTRY(iem.s.uInstrBufPc),
8889 ENTRY(iem.s.GCPhysInstrBuf),
8890 ENTRY(iem.s.cbInstrBufTotal),
8891 ENTRY(iem.s.idxTbCurInstr),
8892 ENTRY(iem.s.fSkippingEFlags),
8893#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8894 ENTRY(iem.s.uPcUpdatingDebug),
8895#endif
8896#ifdef VBOX_WITH_STATISTICS
8897 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8898 ENTRY(iem.s.StatNativeTlbHitsForStore),
8899 ENTRY(iem.s.StatNativeTlbHitsForStack),
8900 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8901 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8902 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8903 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8904 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8905#endif
8906 ENTRY(iem.s.DataTlb.uTlbRevision),
8907 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8908 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8909 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8910 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8911 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8912 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8913 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8914 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8915 ENTRY(iem.s.DataTlb.aEntries),
8916 ENTRY(iem.s.CodeTlb.uTlbRevision),
8917 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8918 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8919 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8920 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8921 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8922 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8923 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8924 ENTRY(iem.s.CodeTlb.aEntries),
8925 ENTRY(pVMR3),
8926 ENTRY(cpum.GstCtx.rax),
8927 ENTRY(cpum.GstCtx.ah),
8928 ENTRY(cpum.GstCtx.rcx),
8929 ENTRY(cpum.GstCtx.ch),
8930 ENTRY(cpum.GstCtx.rdx),
8931 ENTRY(cpum.GstCtx.dh),
8932 ENTRY(cpum.GstCtx.rbx),
8933 ENTRY(cpum.GstCtx.bh),
8934 ENTRY(cpum.GstCtx.rsp),
8935 ENTRY(cpum.GstCtx.rbp),
8936 ENTRY(cpum.GstCtx.rsi),
8937 ENTRY(cpum.GstCtx.rdi),
8938 ENTRY(cpum.GstCtx.r8),
8939 ENTRY(cpum.GstCtx.r9),
8940 ENTRY(cpum.GstCtx.r10),
8941 ENTRY(cpum.GstCtx.r11),
8942 ENTRY(cpum.GstCtx.r12),
8943 ENTRY(cpum.GstCtx.r13),
8944 ENTRY(cpum.GstCtx.r14),
8945 ENTRY(cpum.GstCtx.r15),
8946 ENTRY(cpum.GstCtx.es.Sel),
8947 ENTRY(cpum.GstCtx.es.u64Base),
8948 ENTRY(cpum.GstCtx.es.u32Limit),
8949 ENTRY(cpum.GstCtx.es.Attr),
8950 ENTRY(cpum.GstCtx.cs.Sel),
8951 ENTRY(cpum.GstCtx.cs.u64Base),
8952 ENTRY(cpum.GstCtx.cs.u32Limit),
8953 ENTRY(cpum.GstCtx.cs.Attr),
8954 ENTRY(cpum.GstCtx.ss.Sel),
8955 ENTRY(cpum.GstCtx.ss.u64Base),
8956 ENTRY(cpum.GstCtx.ss.u32Limit),
8957 ENTRY(cpum.GstCtx.ss.Attr),
8958 ENTRY(cpum.GstCtx.ds.Sel),
8959 ENTRY(cpum.GstCtx.ds.u64Base),
8960 ENTRY(cpum.GstCtx.ds.u32Limit),
8961 ENTRY(cpum.GstCtx.ds.Attr),
8962 ENTRY(cpum.GstCtx.fs.Sel),
8963 ENTRY(cpum.GstCtx.fs.u64Base),
8964 ENTRY(cpum.GstCtx.fs.u32Limit),
8965 ENTRY(cpum.GstCtx.fs.Attr),
8966 ENTRY(cpum.GstCtx.gs.Sel),
8967 ENTRY(cpum.GstCtx.gs.u64Base),
8968 ENTRY(cpum.GstCtx.gs.u32Limit),
8969 ENTRY(cpum.GstCtx.gs.Attr),
8970 ENTRY(cpum.GstCtx.rip),
8971 ENTRY(cpum.GstCtx.eflags),
8972 ENTRY(cpum.GstCtx.uRipInhibitInt),
8973 ENTRY(cpum.GstCtx.cr0),
8974 ENTRY(cpum.GstCtx.cr4),
8975 ENTRY(cpum.GstCtx.aXcr[0]),
8976 ENTRY(cpum.GstCtx.aXcr[1]),
8977 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8978 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8979 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8980 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8981 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8982 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8983 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8984 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8985 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8986 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8987 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8988 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8989 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8990 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8991 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8992 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8993 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8994 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8995 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8996 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8997 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8998 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8999 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9000 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9001 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9002 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9003 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9004 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9005 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9006 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9007 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9008 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9009 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9010#undef ENTRY
9011 };
9012#ifdef VBOX_STRICT
9013 static bool s_fOrderChecked = false;
9014 if (!s_fOrderChecked)
9015 {
9016 s_fOrderChecked = true;
9017 uint32_t offPrev = s_aMembers[0].off;
9018 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9019 {
9020 Assert(s_aMembers[i].off > offPrev);
9021 offPrev = s_aMembers[i].off;
9022 }
9023 }
9024#endif
9025
9026 /*
9027 * Binary lookup.
9028 */
9029 unsigned iStart = 0;
9030 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9031 for (;;)
9032 {
9033 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9034 uint32_t const offCur = s_aMembers[iCur].off;
9035 if (off < offCur)
9036 {
9037 if (iCur != iStart)
9038 iEnd = iCur;
9039 else
9040 break;
9041 }
9042 else if (off > offCur)
9043 {
9044 if (iCur + 1 < iEnd)
9045 iStart = iCur + 1;
9046 else
9047 break;
9048 }
9049 else
9050 return s_aMembers[iCur].pszName;
9051 }
9052#ifdef VBOX_WITH_STATISTICS
9053 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9054 return "iem.s.acThreadedFuncStats[iFn]";
9055#endif
9056 return NULL;
9057}
9058
9059
9060/**
9061 * Translates a label to a name.
9062 */
9063static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
9064{
9065 switch (enmLabel)
9066 {
9067#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
9068 STR_CASE_CMN(Invalid);
9069 STR_CASE_CMN(RaiseDe);
9070 STR_CASE_CMN(RaiseUd);
9071 STR_CASE_CMN(RaiseSseRelated);
9072 STR_CASE_CMN(RaiseAvxRelated);
9073 STR_CASE_CMN(RaiseSseAvxFpRelated);
9074 STR_CASE_CMN(RaiseNm);
9075 STR_CASE_CMN(RaiseGp0);
9076 STR_CASE_CMN(RaiseMf);
9077 STR_CASE_CMN(RaiseXf);
9078 STR_CASE_CMN(ObsoleteTb);
9079 STR_CASE_CMN(NeedCsLimChecking);
9080 STR_CASE_CMN(CheckBranchMiss);
9081 STR_CASE_CMN(ReturnSuccess);
9082 STR_CASE_CMN(ReturnBreak);
9083 STR_CASE_CMN(ReturnBreakFF);
9084 STR_CASE_CMN(ReturnWithFlags);
9085 STR_CASE_CMN(ReturnBreakViaLookup);
9086 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
9087 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
9088 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
9089 STR_CASE_CMN(NonZeroRetOrPassUp);
9090#undef STR_CASE_CMN
9091#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
9092 STR_CASE_LBL(LoopJumpTarget);
9093 STR_CASE_LBL(If);
9094 STR_CASE_LBL(Else);
9095 STR_CASE_LBL(Endif);
9096 STR_CASE_LBL(CheckIrq);
9097 STR_CASE_LBL(TlbLookup);
9098 STR_CASE_LBL(TlbMiss);
9099 STR_CASE_LBL(TlbDone);
9100 case kIemNativeLabelType_End: break;
9101 }
9102 return NULL;
9103}
9104
9105
9106/** Info for the symbols resolver used when disassembling. */
9107typedef struct IEMNATIVDISASMSYMCTX
9108{
9109 PVMCPU pVCpu;
9110 PCIEMTB pTb;
9111 PCIEMNATIVEPERCHUNKCTX pCtx;
9112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9113 PCIEMTBDBG pDbgInfo;
9114#endif
9115} IEMNATIVDISASMSYMCTX;
9116typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9117
9118
9119/**
9120 * Resolve address to symbol, if we can.
9121 */
9122static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9123{
9124 PCIEMTB const pTb = pSymCtx->pTb;
9125 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9126 if (offNative <= pTb->Native.cInstructions)
9127 {
9128#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9129 /*
9130 * Scan debug info for a matching label.
9131 * Since the debug info should be 100% linear, we can do a binary search here.
9132 */
9133 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9134 if (pDbgInfo)
9135 {
9136 uint32_t const cEntries = pDbgInfo->cEntries;
9137 uint32_t idxEnd = cEntries;
9138 uint32_t idxStart = 0;
9139 for (;;)
9140 {
9141 /* Find a NativeOffset record close to the midpoint. */
9142 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9143 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9144 idx--;
9145 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9146 {
9147 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9148 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9149 idx++;
9150 if (idx >= idxEnd)
9151 break;
9152 }
9153
9154 /* Do the binary searching thing. */
9155 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9156 {
9157 if (idx > idxStart)
9158 idxEnd = idx;
9159 else
9160 break;
9161 }
9162 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9163 {
9164 idx += 1;
9165 if (idx < idxEnd)
9166 idxStart = idx;
9167 else
9168 break;
9169 }
9170 else
9171 {
9172 /* Got a matching offset, scan forward till we hit a label, but
9173 stop when the native offset changes. */
9174 while (++idx < cEntries)
9175 switch (pDbgInfo->aEntries[idx].Gen.uType)
9176 {
9177 case kIemTbDbgEntryType_Label:
9178 {
9179 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9180 const char * const pszName = iemNativeGetLabelName(enmLabel);
9181 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9182 return pszName;
9183 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9184 return pszBuf;
9185 }
9186
9187 case kIemTbDbgEntryType_NativeOffset:
9188 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9189 return NULL;
9190 break;
9191 }
9192 break;
9193 }
9194 }
9195 }
9196#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9197 }
9198 else
9199 {
9200 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9201 if (pChunkCtx)
9202 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9203 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9204 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9205 }
9206 RT_NOREF(pszBuf, cbBuf);
9207 return NULL;
9208}
9209
9210#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9211
9212/**
9213 * @callback_method_impl{FNDISGETSYMBOL}
9214 */
9215static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9216 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9217{
9218 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9219 if (pszSym)
9220 {
9221 *poff = 0;
9222 if (pszSym != pszBuf)
9223 return RTStrCopy(pszBuf, cchBuf, pszSym);
9224 return VINF_SUCCESS;
9225 }
9226 RT_NOREF(pDis, u32Sel);
9227 return VERR_SYMBOL_NOT_FOUND;
9228}
9229
9230
9231/**
9232 * Appends annotations to the disassembled instructions.
9233 */
9234static void
9235iemNativeDisasmAppendAnnotation(char *pszDisBuf, size_t cbDisBuf, PCDISSTATE pDis)
9236{
9237 const char *pszAnnotation = NULL;
9238# if defined(RT_ARCH_AMD64)
9239 if (pDis->pCurInstr->uOpcode == OP_NOP && pDis->cbInstr == 7) /* iemNativeEmitMarker */
9240 {
9241 static const char * const s_apszMarkers[] =
9242 {
9243 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9244 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9245 };
9246
9247 uint32_t const uInfo = *(uint32_t const *)&pDis->Instr.ab[3];
9248 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9249 RTStrPrintf(pszDisBuf, cbDisBuf, "nop ; marker: call #%u to %s (%u args) - %s\n",
9250 uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9251 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9252 uInfo & 0x8000 ? "recompiled" : "todo");
9253 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(s_apszMarkers))
9254 RTStrPrintf(pszDisBuf, cbDisBuf, "nop ; marker: %s\n", s_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9255 else
9256 RTStrPrintf(pszDisBuf, cbDisBuf, "nop ; unknown marker: %#x (%d)\n", uInfo, uInfo);
9257 return;
9258 }
9259
9260 PCDISOPPARAM pMemOp;
9261 if (DISUSE_IS_EFFECTIVE_ADDR(pDis->aParams[0].fUse))
9262 pMemOp = &pDis->aParams[0];
9263 else if (DISUSE_IS_EFFECTIVE_ADDR(pDis->aParams[1].fUse))
9264 pMemOp = &pDis->aParams[1];
9265 else if (DISUSE_IS_EFFECTIVE_ADDR(pDis->aParams[2].fUse))
9266 pMemOp = &pDis->aParams[2];
9267 else
9268 return;
9269 if ( pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9270 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9271 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9272 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9273 else
9274 return;
9275
9276# elif defined(RT_ARCH_ARM64)
9277 /* The memory operand is always number two on arm. */
9278 if ( pDis->aParams[1].armv8.enmType == kDisArmv8OpParmAddrInGpr
9279 && !(pDis->aParams[1].fUse & (DISUSE_INDEX | DISUSE_PRE_INDEXED | DISUSE_POST_INDEXED))
9280 /** @todo DISUSE_REG_GEN64 is not set: && (pDis->aParams[1].fUse & DISUSE_REG_GEN64) */
9281 && pDis->aParams[1].armv8.Op.Reg.enmRegType == kDisOpParamArmV8RegType_Gpr_64Bit)
9282 {
9283 if (pDis->aParams[1].armv8.Op.Reg.idReg == IEMNATIVE_REG_FIXED_PVMCPU)
9284 pszAnnotation = iemNativeDbgVCpuOffsetToName(pDis->aParams[1].armv8.u.offBase);
9285 else if (pDis->aParams[1].armv8.Op.Reg.idReg == IEMNATIVE_REG_FIXED_PCPUMCTX)
9286 pszAnnotation = iemNativeDbgVCpuOffsetToName(pDis->aParams[1].armv8.u.offBase + RT_UOFFSETOF(VMCPU, cpum.GstCtx));
9287 }
9288 else
9289 return;
9290
9291# else
9292# error "Port me"
9293# endif
9294 if (pszAnnotation)
9295 {
9296 static unsigned const s_offAnnotation = 55;
9297 size_t const cchAnnotation = strlen(pszAnnotation);
9298 size_t cchDis = strlen(pszDisBuf);
9299 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= cbDisBuf)
9300 {
9301 if (cchDis < s_offAnnotation)
9302 {
9303 memset(&pszDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9304 cchDis = s_offAnnotation;
9305 }
9306 pszDisBuf[cchDis++] = ' ';
9307 pszDisBuf[cchDis++] = ';';
9308 pszDisBuf[cchDis++] = ' ';
9309 memcpy(&pszDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9310 }
9311 }
9312}
9313
9314#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9315
9316/**
9317 * Annotates an instruction decoded by the capstone disassembler.
9318 */
9319static const char *
9320iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9321{
9322# if defined(RT_ARCH_ARM64)
9323 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9324 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9325 {
9326 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9327 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9328 char const *psz = strchr(pInstr->op_str, '[');
9329 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9330 {
9331 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9332 int32_t off = -1;
9333 psz += 4;
9334 if (*psz == ']')
9335 off = 0;
9336 else if (*psz == ',')
9337 {
9338 psz = RTStrStripL(psz + 1);
9339 if (*psz == '#')
9340 off = RTStrToInt32(&psz[1]);
9341 /** @todo deal with index registers and LSL as well... */
9342 }
9343 if (off >= 0)
9344 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9345 }
9346 }
9347 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9348 {
9349 const char *pszAddr = strchr(pInstr->op_str, '#');
9350 if (pszAddr)
9351 {
9352 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9353 if (uAddr != 0)
9354 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9355 }
9356 }
9357# endif
9358 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9359 return NULL;
9360}
9361#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9362
9363
9364DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9365{
9366 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9367 char szDisBuf[512];
9368 DISSTATE Dis;
9369 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9370 uint32_t const cNative = pTb->Native.cInstructions;
9371 uint32_t offNative = 0;
9372#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9373 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9374#endif
9375 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9376 : (pTb->fFlags & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9377 : DISCPUMODE_64BIT;
9378#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9379 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9380#else
9381 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9382#endif
9383#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9384 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9385#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9386 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9387#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9388# error "Port me"
9389#else
9390 csh hDisasm = ~(size_t)0;
9391# if defined(RT_ARCH_AMD64)
9392 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9393# elif defined(RT_ARCH_ARM64)
9394 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9395# else
9396# error "Port me"
9397# endif
9398 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9399
9400 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9401 //Assert(rcCs == CS_ERR_OK);
9402#endif
9403
9404 /*
9405 * Print TB info.
9406 */
9407 pHlp->pfnPrintf(pHlp,
9408 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9409 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9410 pTb, pTb->GCPhysPc,
9411#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9412 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9413#else
9414 pTb->FlatPc,
9415#endif
9416 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9417 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9418#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9419 if (pDbgInfo && pDbgInfo->cEntries > 1)
9420 {
9421 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9422
9423 /*
9424 * This disassembly is driven by the debug info which follows the native
9425 * code and indicates when it starts with the next guest instructions,
9426 * where labels are and such things.
9427 */
9428 uint32_t idxThreadedCall = 0;
9429 uint32_t idxGuestInstr = 0;
9430 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9431 uint8_t idxRange = UINT8_MAX;
9432 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9433 uint32_t offRange = 0;
9434 uint32_t offOpcodes = 0;
9435 uint32_t const cbOpcodes = pTb->cbOpcodes;
9436 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9437 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9438 uint32_t iDbgEntry = 1;
9439 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9440
9441 while (offNative < cNative)
9442 {
9443 /* If we're at or have passed the point where the next chunk of debug
9444 info starts, process it. */
9445 if (offDbgNativeNext <= offNative)
9446 {
9447 offDbgNativeNext = UINT32_MAX;
9448 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9449 {
9450 switch ((IEMTBDBGENTRYTYPE)pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9451 {
9452 case kIemTbDbgEntryType_GuestInstruction:
9453 {
9454 /* Did the exec flag change? */
9455 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9456 {
9457 pHlp->pfnPrintf(pHlp,
9458 " fExec change %#08x -> %#08x %s\n",
9459 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9460 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9461 szDisBuf, sizeof(szDisBuf)));
9462 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9463 enmGstCpuMode = (fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9464 : (fExec & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9465 : DISCPUMODE_64BIT;
9466 }
9467
9468 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9469 where the compilation was aborted before the opcode was recorded and the actual
9470 instruction was translated to a threaded call. This may happen when we run out
9471 of ranges, or when some complicated interrupts/FFs are found to be pending or
9472 similar. So, we just deal with it here rather than in the compiler code as it
9473 is a lot simpler to do here. */
9474 if ( idxRange == UINT8_MAX
9475 || idxRange >= cRanges
9476 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9477 {
9478 idxRange += 1;
9479 if (idxRange < cRanges)
9480 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9481 else
9482 continue;
9483 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9484 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9485 + (pTb->aRanges[idxRange].idxPhysPage == 0
9486 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9487 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9488 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9489 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9490 pTb->aRanges[idxRange].idxPhysPage);
9491 GCPhysPc += offRange;
9492 }
9493
9494 /* Disassemble the instruction. */
9495 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9496 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9497 uint32_t cbInstr = 1;
9498 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9499 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9500 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9501 if (RT_SUCCESS(rc))
9502 {
9503 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9504 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9505 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9506 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9507
9508 static unsigned const s_offMarker = 55;
9509 static char const s_szMarker[] = " ; <--- guest";
9510 if (cch < s_offMarker)
9511 {
9512 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9513 cch = s_offMarker;
9514 }
9515 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9516 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9517
9518 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9519 }
9520 else
9521 {
9522 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9523 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9524 cbInstr = 1;
9525 }
9526 idxGuestInstr++;
9527 GCPhysPc += cbInstr;
9528 offOpcodes += cbInstr;
9529 offRange += cbInstr;
9530 continue;
9531 }
9532
9533 case kIemTbDbgEntryType_ThreadedCall:
9534 pHlp->pfnPrintf(pHlp,
9535 " Call #%u to %s (%u args) - %s\n",
9536 idxThreadedCall,
9537 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9538 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9539 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9540 idxThreadedCall++;
9541 continue;
9542
9543 case kIemTbDbgEntryType_GuestRegShadowing:
9544 {
9545 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9546 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9547 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9548 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9549 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9550 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9551 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9552 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9553 else
9554 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9555 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9556 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9557 continue;
9558 }
9559
9560 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9561 {
9562 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9563 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9564 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9565 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9566 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9567 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9568 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9569 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9570 else
9571 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9572 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9573 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9574 continue;
9575 }
9576
9577 case kIemTbDbgEntryType_Label:
9578 {
9579 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9580 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9581 {
9582 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9583 ? " ; regs state restored pre-if-block" : "";
9584 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9585 }
9586 else
9587 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9588 continue;
9589 }
9590
9591 case kIemTbDbgEntryType_NativeOffset:
9592 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9593 Assert(offDbgNativeNext >= offNative);
9594 break;
9595
9596# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9597 case kIemTbDbgEntryType_DelayedPcUpdate:
9598 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9599 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9600 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9601 continue;
9602# endif
9603
9604# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9605 case kIemTbDbgEntryType_GuestRegDirty:
9606 {
9607 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9608 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9609 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9610 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9611 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9612 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9613 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9614 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9615 pszGstReg, pszHstReg);
9616 continue;
9617 }
9618
9619 case kIemTbDbgEntryType_GuestRegWriteback:
9620 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9621 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9622 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9623 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9624 continue;
9625# endif
9626
9627# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9628 case kIemTbDbgEntryType_PostponedEFlagsCalc:
9629 {
9630 const char *pszOp = "!unknown!";
9631 switch ((IEMNATIVE_POSTPONED_EFL_OP_T)pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.enmOp)
9632 {
9633 case kIemNativePostponedEflOp_Logical: pszOp = "logical"; break;
9634 case kIemNativePostponedEflOp_Invalid: break;
9635 case kIemNativePostponedEflOp_End: break;
9636 }
9637 pHlp->pfnPrintf(pHlp, " Postponed EFLAGS calc #%u: %s %u bits\n",
9638 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.idxEmit, pszOp,
9639 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.cOpBits);
9640 continue;
9641 }
9642# endif
9643 default:
9644 AssertFailed();
9645 continue;
9646 }
9647 /* Break out of the loop at kIemTbDbgEntryType_NativeOffset. */
9648 iDbgEntry++;
9649 break;
9650 }
9651 }
9652
9653 /*
9654 * Disassemble the next native instruction.
9655 */
9656 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9657# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9658 uint32_t cbInstr = sizeof(paNative[0]);
9659 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9660 if (RT_SUCCESS(rc))
9661 {
9662# ifdef RT_ARCH_AMD64
9663 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9664 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9665 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9666 iemNativeDisasmGetSymbolCb, &SymCtx);
9667# elif defined(RT_ARCH_ARM64)
9668 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9669 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9670 iemNativeDisasmGetSymbolCb, &SymCtx);
9671# else
9672# error "Port me"
9673# endif
9674 iemNativeDisasmAppendAnnotation(szDisBuf, sizeof(szDisBuf), &Dis);
9675 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9676 }
9677 else
9678 {
9679# if defined(RT_ARCH_AMD64)
9680 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9681 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9682# elif defined(RT_ARCH_ARM64)
9683 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9684# else
9685# error "Port me"
9686# endif
9687 cbInstr = sizeof(paNative[0]);
9688 }
9689 offNative += cbInstr / sizeof(paNative[0]);
9690
9691# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9692 cs_insn *pInstr;
9693 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9694 (uintptr_t)pNativeCur, 1, &pInstr);
9695 if (cInstrs > 0)
9696 {
9697 Assert(cInstrs == 1);
9698 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9699 size_t const cchOp = strlen(pInstr->op_str);
9700# if defined(RT_ARCH_AMD64)
9701 if (pszAnnotation)
9702 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9703 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9704 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9705 else
9706 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9707 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9708
9709# else
9710 if (pszAnnotation)
9711 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9712 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9713 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9714 else
9715 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9716 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9717# endif
9718 offNative += pInstr->size / sizeof(*pNativeCur);
9719 cs_free(pInstr, cInstrs);
9720 }
9721 else
9722 {
9723# if defined(RT_ARCH_AMD64)
9724 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9725 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9726# else
9727 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9728# endif
9729 offNative++;
9730 }
9731# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9732 }
9733 }
9734 else
9735#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9736 {
9737 /*
9738 * No debug info, just disassemble the x86 code and then the native code.
9739 *
9740 * First the guest code:
9741 */
9742 for (unsigned i = 0; i < pTb->cRanges; i++)
9743 {
9744 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9745 + (pTb->aRanges[i].idxPhysPage == 0
9746 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9747 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9748 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9749 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9750 unsigned off = pTb->aRanges[i].offOpcodes;
9751 /** @todo this ain't working when crossing pages! */
9752 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9753 while (off < cbOpcodes)
9754 {
9755 uint32_t cbInstr = 1;
9756 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9757 &pTb->pabOpcodes[off], cbOpcodes - off,
9758 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9759 if (RT_SUCCESS(rc))
9760 {
9761 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9762 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9763 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9764 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9765 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9766 GCPhysPc += cbInstr;
9767 off += cbInstr;
9768 }
9769 else
9770 {
9771 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9772 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9773 break;
9774 }
9775 }
9776 }
9777
9778 /*
9779 * Then the native code:
9780 */
9781 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9782 while (offNative < cNative)
9783 {
9784 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9785#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9786 uint32_t cbInstr = sizeof(paNative[0]);
9787 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9788 if (RT_SUCCESS(rc))
9789 {
9790# ifdef RT_ARCH_AMD64
9791 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9792 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9793 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9794 iemNativeDisasmGetSymbolCb, &SymCtx);
9795# elif defined(RT_ARCH_ARM64)
9796 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9797 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9798 iemNativeDisasmGetSymbolCb, &SymCtx);
9799# else
9800# error "Port me"
9801# endif
9802 iemNativeDisasmAppendAnnotation(szDisBuf, sizeof(szDisBuf), &Dis);
9803 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9804 }
9805 else
9806 {
9807# if defined(RT_ARCH_AMD64)
9808 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9809 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9810# else
9811 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9812# endif
9813 cbInstr = sizeof(paNative[0]);
9814 }
9815 offNative += cbInstr / sizeof(paNative[0]);
9816
9817#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9818 cs_insn *pInstr;
9819 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9820 (uintptr_t)pNativeCur, 1, &pInstr);
9821 if (cInstrs > 0)
9822 {
9823 Assert(cInstrs == 1);
9824 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9825 size_t const cchOp = strlen(pInstr->op_str);
9826# if defined(RT_ARCH_AMD64)
9827 if (pszAnnotation)
9828 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9829 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9830 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9831 else
9832 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9833 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9834
9835# else
9836 if (pszAnnotation)
9837 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9838 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9839 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9840 else
9841 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9842 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9843# endif
9844 offNative += pInstr->size / sizeof(*pNativeCur);
9845 cs_free(pInstr, cInstrs);
9846 }
9847 else
9848 {
9849# if defined(RT_ARCH_AMD64)
9850 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9851 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9852# else
9853 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9854# endif
9855 offNative++;
9856 }
9857#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9858 }
9859 }
9860
9861#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9862 /* Cleanup. */
9863 cs_close(&hDisasm);
9864#endif
9865}
9866
9867
9868/** Emit alignment padding between labels / functions. */
9869DECL_INLINE_THROW(uint32_t)
9870iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9871{
9872 if (off & fAlignMask)
9873 {
9874 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9875 while (off & fAlignMask)
9876#if defined(RT_ARCH_AMD64)
9877 pCodeBuf[off++] = 0xcc;
9878#elif defined(RT_ARCH_ARM64)
9879 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9880#else
9881# error "port me"
9882#endif
9883 }
9884 return off;
9885}
9886
9887
9888/**
9889 * Called when a new chunk is allocate to emit common per-chunk code.
9890 *
9891 * Allocates a per-chunk context directly from the chunk itself and place the
9892 * common code there.
9893 *
9894 * @returns VBox status code.
9895 * @param pVCpu The cross context virtual CPU structure of the calling
9896 * thread.
9897 * @param idxChunk The index of the chunk being added and requiring a
9898 * common code context.
9899 * @param ppCtx Where to return the pointer to the chunk context start.
9900 */
9901DECLHIDDEN(int) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk, PCIEMNATIVEPERCHUNKCTX *ppCtx)
9902{
9903 *ppCtx = NULL;
9904
9905 /*
9906 * Allocate a new recompiler state (since we're likely to be called while
9907 * the default one is fully loaded already with a recompiled TB).
9908 *
9909 * This is a bit of overkill, but this isn't a frequently used code path.
9910 */
9911 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9912 AssertReturn(pReNative, VERR_NO_MEMORY);
9913
9914#if defined(RT_ARCH_AMD64)
9915 uint32_t const fAlignMask = 15;
9916#elif defined(RT_ARCH_ARM64)
9917 uint32_t const fAlignMask = 31 / 4;
9918#else
9919# error "port me"
9920#endif
9921 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9922 int rc = VINF_SUCCESS;
9923 uint32_t off = 0;
9924
9925 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9926 {
9927 /*
9928 * Emit the epilog code.
9929 */
9930 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9931 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9932 uint32_t const offReturnWithStatus = off;
9933 off = iemNativeEmitCoreEpilog(pReNative, off);
9934
9935 /*
9936 * Generate special jump labels. All of these gets a copy of the epilog code.
9937 */
9938 static struct
9939 {
9940 IEMNATIVELABELTYPE enmExitReason;
9941 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9942 } const s_aSpecialWithEpilogs[] =
9943 {
9944 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9945 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9946 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9947 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9948 };
9949 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9950 {
9951 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9952 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9953 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9954 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9955 off = iemNativeEmitCoreEpilog(pReNative, off);
9956 }
9957
9958 /*
9959 * Do what iemNativeEmitReturnBreakViaLookup does.
9960 */
9961 static struct
9962 {
9963 IEMNATIVELABELTYPE enmExitReason;
9964 uintptr_t pfnHelper;
9965 } const s_aViaLookup[] =
9966 {
9967 { kIemNativeLabelType_ReturnBreakViaLookup,
9968 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9969 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9970 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9971 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9972 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9973 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9974 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9975 };
9976 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9977 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9978 {
9979 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9980 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9981 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9982 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9983 }
9984
9985 /*
9986 * Generate simple TB tail labels that just calls a help with a pVCpu
9987 * arg and either return or longjmps/throws a non-zero status.
9988 */
9989 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9990 static struct
9991 {
9992 IEMNATIVELABELTYPE enmExitReason;
9993 bool fWithEpilog;
9994 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9995 } const s_aSimpleTailLabels[] =
9996 {
9997 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9998 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9999 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
10000 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
10001 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
10002 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
10003 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
10004 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
10005 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
10006 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
10007 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
10008 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
10009 };
10010 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
10011 {
10012 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
10013 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
10014 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
10015
10016 /* int pfnCallback(PVMCPUCC pVCpu) */
10017 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10018 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
10019
10020 /* If the callback is supposed to return with a status code we inline the epilog
10021 sequence for better speed. Otherwise, if the callback shouldn't return because
10022 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
10023 if (s_aSimpleTailLabels[i].fWithEpilog)
10024 off = iemNativeEmitCoreEpilog(pReNative, off);
10025 else
10026 {
10027#ifdef VBOX_STRICT
10028 off = iemNativeEmitBrk(pReNative, off, 0x2201);
10029#endif
10030 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
10031 }
10032 }
10033
10034
10035#ifdef VBOX_STRICT
10036 /* Make sure we've generate code for all labels. */
10037 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
10038 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10039#endif
10040 }
10041 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10042 {
10043 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
10044 iemNativeTerm(pReNative);
10045 return rc;
10046 }
10047 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10048
10049 /*
10050 * Allocate memory for the context (first) and the common code (last).
10051 */
10052 PIEMNATIVEPERCHUNKCTX pCtx;
10053 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
10054 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
10055 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
10056 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
10057 AssertLogRelMsgReturnStmt(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk),
10058 iemNativeTerm(pReNative), VERR_OUT_OF_RESOURCES);
10059
10060 /*
10061 * Copy over the generated code.
10062 * There should be no fixups or labels defined here.
10063 */
10064 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
10065 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
10066
10067 Assert(pReNative->cFixups == 0);
10068 Assert(pReNative->cLabels == 0);
10069
10070 /*
10071 * Initialize the context.
10072 */
10073 AssertCompile(kIemNativeLabelType_Invalid == 0);
10074 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
10075 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
10076 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
10077 {
10078 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10079 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
10080 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
10081 }
10082
10083 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
10084
10085 iemNativeTerm(pReNative);
10086 *ppCtx = pCtx;
10087 return VINF_SUCCESS;
10088}
10089
10090
10091/**
10092 * Recompiles the given threaded TB into a native one.
10093 *
10094 * In case of failure the translation block will be returned as-is.
10095 *
10096 * @returns pTb.
10097 * @param pVCpu The cross context virtual CPU structure of the calling
10098 * thread.
10099 * @param pTb The threaded translation to recompile to native.
10100 */
10101IEM_DECL_MSC_GUARD_IGNORE DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10102{
10103#if 0 /* For profiling the native recompiler code. */
10104l_profile_again:
10105#endif
10106 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10107
10108 /*
10109 * The first time thru, we allocate the recompiler state and save it,
10110 * all the other times we'll just reuse the saved one after a quick reset.
10111 */
10112 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10113 if (RT_LIKELY(pReNative))
10114 iemNativeReInit(pReNative, pTb);
10115 else
10116 {
10117 pReNative = iemNativeInit(pVCpu, pTb);
10118 AssertReturn(pReNative, pTb);
10119 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10120 }
10121
10122#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10123 /*
10124 * First do liveness analysis. This is done backwards.
10125 */
10126 {
10127 uint32_t idxCall = pTb->Thrd.cCalls;
10128 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10129 { /* likely */ }
10130 else
10131 {
10132 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10133 while (idxCall > cAlloc)
10134 cAlloc *= 2;
10135 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10136 AssertReturn(pvNew, pTb);
10137 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10138 pReNative->cLivenessEntriesAlloc = cAlloc;
10139 }
10140 AssertReturn(idxCall > 0, pTb);
10141 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10142
10143 /* The initial (final) entry. */
10144 idxCall--;
10145 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10146
10147 /* Loop backwards thru the calls and fill in the other entries. */
10148 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10149 while (idxCall > 0)
10150 {
10151 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10152 Assert(pfnLiveness);
10153 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10154 pCallEntry--;
10155 idxCall--;
10156 }
10157 }
10158#endif
10159
10160 /*
10161 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10162 * for aborting if an error happens.
10163 */
10164 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10165#ifdef LOG_ENABLED
10166 uint32_t const cCallsOrg = cCallsLeft;
10167#endif
10168 uint32_t off = 0;
10169 int rc = VINF_SUCCESS;
10170 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10171 {
10172 /*
10173 * Convert the calls to native code.
10174 */
10175#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10176 int32_t iGstInstr = -1;
10177#endif
10178#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10179 uint32_t cThreadedCalls = 0;
10180 uint32_t cRecompiledCalls = 0;
10181#endif
10182#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10183 uint32_t idxCurCall = 0;
10184#endif
10185 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10186 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10187 while (cCallsLeft-- > 0)
10188 {
10189 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10190#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10191 pReNative->idxCurCall = idxCurCall;
10192#endif
10193
10194#ifdef IEM_WITH_INTRA_TB_JUMPS
10195 /*
10196 * Define label for jump targets (currently only the first entry).
10197 */
10198 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10199 { /* likely */ }
10200 else
10201 {
10202 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10203 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10204 }
10205#endif
10206
10207 /*
10208 * Debug info, assembly markup and statistics.
10209 */
10210#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10211 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10212 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10213#endif
10214#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10215 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10216 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10217 {
10218 if (iGstInstr < (int32_t)pTb->cInstructions)
10219 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10220 else
10221 Assert(iGstInstr == pTb->cInstructions);
10222 iGstInstr = pCallEntry->idxInstr;
10223 }
10224 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10225#endif
10226#if defined(VBOX_STRICT)
10227 off = iemNativeEmitMarker(pReNative, off,
10228 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10229#endif
10230#if defined(VBOX_STRICT)
10231 iemNativeRegAssertSanity(pReNative);
10232#endif
10233#ifdef VBOX_WITH_STATISTICS
10234 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10235#endif
10236
10237#if 0
10238 if ( pTb->GCPhysPc == 0x00000000000c1240
10239 && idxCurCall == 67)
10240 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10241#endif
10242
10243 /*
10244 * Actual work.
10245 */
10246 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10247 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10248 if (pfnRecom) /** @todo stats on this. */
10249 {
10250 off = pfnRecom(pReNative, off, pCallEntry);
10251 STAM_REL_STATS({cRecompiledCalls++;});
10252 }
10253 else
10254 {
10255 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10256 STAM_REL_STATS({cThreadedCalls++;});
10257 }
10258 Assert(off <= pReNative->cInstrBufAlloc);
10259 Assert(pReNative->cCondDepth == 0);
10260
10261#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10262 if (LogIs2Enabled())
10263 {
10264 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10265# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10266 static const char s_achState[] = "CUXI";
10267# else
10268 /* 0123 4567 89ab cdef */
10269 /* CCCC CCCC */
10270 /* WWWW WWWW */
10271 /* RR RR RR RR */
10272 /* P P P P P P P P */
10273 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10274# endif
10275
10276 char szGpr[17];
10277 for (unsigned i = 0; i < 16; i++)
10278 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10279 szGpr[16] = '\0';
10280
10281 char szSegBase[X86_SREG_COUNT + 1];
10282 char szSegLimit[X86_SREG_COUNT + 1];
10283 char szSegAttrib[X86_SREG_COUNT + 1];
10284 char szSegSel[X86_SREG_COUNT + 1];
10285 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10286 {
10287 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10288 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10289 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10290 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10291 }
10292 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10293 = szSegSel[X86_SREG_COUNT] = '\0';
10294
10295 char szEFlags[IEMLIVENESSBIT_IDX_EFL_COUNT + 1];
10296 for (unsigned i = 0; i < IEMLIVENESSBIT_IDX_EFL_COUNT; i++)
10297 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10298 szEFlags[7] = '\0';
10299
10300 Log2(("liveness: gpr=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10301 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10302 }
10303#endif
10304
10305 /*
10306 * Advance.
10307 */
10308 pCallEntry++;
10309#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10310 idxCurCall++;
10311#endif
10312 }
10313
10314 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10315 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10316 if (!cThreadedCalls)
10317 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10318
10319 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, UINT32_MAX);
10320
10321#ifdef VBOX_WITH_STATISTICS
10322 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10323#endif
10324
10325 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10326 off = iemNativeRegFlushPendingWrites(pReNative, off);
10327
10328 /*
10329 * Jump to the common per-chunk epilog code.
10330 */
10331 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10332 off = iemNativeEmitTbExit<kIemNativeLabelType_ReturnSuccess, true, false>(pReNative, off);
10333
10334 /*
10335 * Generate tail labels with jumps to the common per-chunk code on non-x86 hosts.
10336 */
10337#ifndef RT_ARCH_AMD64
10338 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10339 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10340 AssertCompile(kIemNativeLabelType_Invalid == 0);
10341 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10342 if (fTailLabels)
10343 {
10344 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, kIemNativeLabelType_LastTbExit + 1);
10345 do
10346 {
10347 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10348 fTailLabels &= ~RT_BIT_64(enmLabel);
10349
10350 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10351 AssertContinue(idxLabel != UINT32_MAX);
10352 iemNativeLabelDefine(pReNative, idxLabel, off);
10353
10354 iemNativeAddTbExitFixup(pReNative, off, enmLabel);
10355# ifdef RT_ARCH_ARM64
10356 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
10357# else
10358# error "port me"
10359# endif
10360 } while (fTailLabels);
10361 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10362 }
10363#else
10364 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10365#endif
10366 }
10367 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10368 {
10369 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10370 return pTb;
10371 }
10372 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10373 Assert(off <= pReNative->cInstrBufAlloc);
10374
10375 /*
10376 * Make sure all labels has been defined.
10377 */
10378 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10379#ifdef VBOX_STRICT
10380 uint32_t const cLabels = pReNative->cLabels;
10381 for (uint32_t i = 0; i < cLabels; i++)
10382 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10383#endif
10384
10385#if 0 /* For profiling the native recompiler code. */
10386 if (pTb->Thrd.cCalls >= 136)
10387 {
10388 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10389 goto l_profile_again;
10390 }
10391#endif
10392
10393 /*
10394 * Allocate executable memory, copy over the code we've generated.
10395 */
10396 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10397 if (pTbAllocator->pDelayedFreeHead)
10398 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10399
10400 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10401 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10402 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10403 &paFinalInstrBufRx, &pCtx);
10404
10405 AssertReturn(paFinalInstrBuf, pTb);
10406 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10407
10408 /*
10409 * Apply fixups.
10410 */
10411 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10412 uint32_t const cFixups = pReNative->cFixups;
10413 for (uint32_t i = 0; i < cFixups; i++)
10414 {
10415 Assert(paFixups[i].off < off);
10416 Assert(paFixups[i].idxLabel < cLabels);
10417 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10418 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10419 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10420 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10421 switch (paFixups[i].enmType)
10422 {
10423#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10424 case kIemNativeFixupType_Rel32:
10425 Assert(paFixups[i].off + 4 <= off);
10426 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10427 continue;
10428
10429#elif defined(RT_ARCH_ARM64)
10430 case kIemNativeFixupType_RelImm26At0:
10431 {
10432 Assert(paFixups[i].off < off);
10433 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10434 Assert(offDisp >= -33554432 && offDisp < 33554432);
10435 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10436 continue;
10437 }
10438
10439 case kIemNativeFixupType_RelImm19At5:
10440 {
10441 Assert(paFixups[i].off < off);
10442 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10443 Assert(offDisp >= -262144 && offDisp < 262144);
10444 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10445 continue;
10446 }
10447
10448 case kIemNativeFixupType_RelImm14At5:
10449 {
10450 Assert(paFixups[i].off < off);
10451 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10452 Assert(offDisp >= -8192 && offDisp < 8192);
10453 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10454 continue;
10455 }
10456
10457#endif
10458 case kIemNativeFixupType_Invalid:
10459 case kIemNativeFixupType_End:
10460 break;
10461 }
10462 AssertFailed();
10463 }
10464
10465 /*
10466 * Apply TB exit fixups.
10467 */
10468 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10469 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10470 for (uint32_t i = 0; i < cTbExitFixups; i++)
10471 {
10472 Assert(paTbExitFixups[i].off < off);
10473 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10474 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10475
10476#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10477 Assert(paTbExitFixups[i].off + 4 <= off);
10478 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10479 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10480 *Ptr.pi32 = (int32_t)offDisp;
10481
10482#elif defined(RT_ARCH_ARM64)
10483 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10484 Assert(offDisp >= -33554432 && offDisp < 33554432);
10485 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10486
10487#else
10488# error "Port me!"
10489#endif
10490 }
10491
10492 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10493 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10494
10495 /*
10496 * Convert the translation block.
10497 */
10498 RTMemFree(pTb->Thrd.paCalls);
10499 pTb->Native.paInstructions = paFinalInstrBufRx;
10500 pTb->Native.cInstructions = off;
10501 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10502#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10503 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10504 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10505 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10506#endif
10507
10508 Assert(pTbAllocator->cThreadedTbs > 0);
10509 pTbAllocator->cThreadedTbs -= 1;
10510 pTbAllocator->cNativeTbs += 1;
10511 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10512
10513#ifdef LOG_ENABLED
10514 /*
10515 * Disassemble to the log if enabled.
10516 */
10517 if (LogIs3Enabled())
10518 {
10519 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10520 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10521# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10522 RTLogFlush(NULL);
10523# endif
10524 }
10525#endif
10526 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10527
10528 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10529 return pTb;
10530}
10531
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette