VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 106315

Last change on this file since 106315 was 106315, checked in by vboxsync, 6 weeks ago

VMM/IEM: Reduce the number of parameters passed to the shadowed-guest-register allocator. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 452.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 106315 2024-10-15 01:05:43Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : Postponed and skipped EFLAGS calculations.
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80#include "target-x86/IEMAllN8veEmit-x86.h"
81
82
83/*
84 * Narrow down configs here to avoid wasting time on unused configs here.
85 * Note! Same checks in IEMAllThrdRecompiler.cpp.
86 */
87
88#ifndef IEM_WITH_CODE_TLB
89# error The code TLB must be enabled for the recompiler.
90#endif
91
92#ifndef IEM_WITH_DATA_TLB
93# error The data TLB must be enabled for the recompiler.
94#endif
95
96#ifndef IEM_WITH_SETJMP
97# error The setjmp approach must be enabled for the recompiler.
98#endif
99
100/** @todo eliminate this clang build hack. */
101#if RT_CLANG_PREREQ(4, 0)
102# pragma GCC diagnostic ignored "-Wunused-function"
103#endif
104
105
106/*********************************************************************************************************************************
107* Internal Functions *
108*********************************************************************************************************************************/
109#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
110static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
111#endif
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
113DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
114 IEMNATIVEGSTREG enmGstReg, uint32_t off);
115DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
116static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
117
118
119
120/*********************************************************************************************************************************
121* Native Recompilation *
122*********************************************************************************************************************************/
123
124
125/**
126 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
127 */
128IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
129{
130 pVCpu->iem.s.cInstructions += idxInstr;
131 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
132}
133
134
135/**
136 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
137 */
138DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
139{
140 uint64_t fCpu = pVCpu->fLocalForcedActions;
141 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
142 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
143 | VMCPU_FF_TLB_FLUSH
144 | VMCPU_FF_UNHALT );
145 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
146 if (RT_LIKELY( ( !fCpu
147 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
148 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
149 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
150 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
151 return false;
152 return true;
153}
154
155
156/**
157 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
158 */
159template <bool const a_fWithIrqCheck>
160IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
161 uint32_t fFlags, RTGCPHYS GCPhysPc))
162{
163 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
164 Assert(idxTbLookup < pTb->cTbLookupEntries);
165 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
166#if 1
167 PIEMTB const pNewTb = *ppNewTb;
168 if (pNewTb)
169 {
170# ifdef VBOX_STRICT
171 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
172 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
173 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
174 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
175 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
176 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
177# endif
178 if (pNewTb->GCPhysPc == GCPhysPc)
179 {
180# ifdef VBOX_STRICT
181 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
182 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
183 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
184 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
185 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
186# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
187 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
188# else
189 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
190 {
191 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
192 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
193 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
194 }
195# endif
196 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
197 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
198 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
199#endif
200
201 /*
202 * Check them + type.
203 */
204 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
205 {
206 /*
207 * Check for interrupts and stuff.
208 */
209 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
210 * The main problem are the statistics and to some degree the logging. :/ */
211 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
212 {
213 /* Do polling. */
214 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
215 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
216 {
217 /*
218 * Success. Update statistics and switch to the next TB.
219 */
220 if (a_fWithIrqCheck)
221 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
222 else
223 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
224
225 pNewTb->cUsed += 1;
226 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
227 pVCpu->iem.s.pCurTbR3 = pNewTb;
228 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
229 pVCpu->iem.s.cTbExecNative += 1;
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
231 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
232 return (uintptr_t)pNewTb->Native.paInstructions;
233 }
234 }
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
236 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
237 }
238 else
239 {
240 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
241 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
242 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
243 }
244 }
245 else
246 {
247 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
249 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
250 }
251 }
252 else
253 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
254#else
255 NOREF(GCPhysPc);
256#endif
257
258 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
259 return 0;
260}
261
262
263/**
264 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
265 */
266template <bool const a_fWithIrqCheck>
267IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
268{
269 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
270 Assert(idxTbLookup < pTb->cTbLookupEntries);
271 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
272#if 1
273 PIEMTB const pNewTb = *ppNewTb;
274 if (pNewTb)
275 {
276 /*
277 * Calculate the flags for the next TB and check if they match.
278 */
279 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
280 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
281 { /* likely */ }
282 else
283 {
284 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
285 fFlags |= IEMTB_F_INHIBIT_SHADOW;
286 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
287 fFlags |= IEMTB_F_INHIBIT_NMI;
288 }
289 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
290 {
291 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
292 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
293 { /* likely */ }
294 else
295 fFlags |= IEMTB_F_CS_LIM_CHECKS;
296 }
297 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
298
299 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
300 {
301 /*
302 * Do the TLB lookup for flat RIP and compare the result with the next TB.
303 *
304 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
305 */
306 /* Calc the effective PC. */
307 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
308 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
309 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
310
311 /* Advance within the current buffer (PAGE) when possible. */
312 RTGCPHYS GCPhysPc;
313 uint64_t off;
314 if ( pVCpu->iem.s.pbInstrBuf
315 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
316 {
317 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
318 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
319 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
320 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
321 else
322 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
323 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
324 }
325 else
326 {
327 pVCpu->iem.s.pbInstrBuf = NULL;
328 pVCpu->iem.s.offCurInstrStart = 0;
329 pVCpu->iem.s.offInstrNextByte = 0;
330 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
331 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
332 }
333
334 if (pNewTb->GCPhysPc == GCPhysPc)
335 {
336 /*
337 * Check for interrupts and stuff.
338 */
339 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
340 * The main problem are the statistics and to some degree the logging. :/ */
341 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
342 {
343 /* Do polling. */
344 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
345 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
346 {
347 /*
348 * Success. Update statistics and switch to the next TB.
349 */
350 if (a_fWithIrqCheck)
351 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
352 else
353 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
354
355 pNewTb->cUsed += 1;
356 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
357 pVCpu->iem.s.pCurTbR3 = pNewTb;
358 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
359 pVCpu->iem.s.cTbExecNative += 1;
360 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
361 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
362 return (uintptr_t)pNewTb->Native.paInstructions;
363 }
364 }
365 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
366 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
367 }
368 else
369 {
370 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
371 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
372 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
373 }
374 }
375 else
376 {
377 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
378 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
379 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
380 }
381 }
382 else
383 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
384#else
385 NOREF(fFlags);
386 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
387#endif
388
389 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
390 return 0;
391}
392
393
394/**
395 * Used by TB code when it wants to raise a \#DE.
396 */
397IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
398{
399 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
400 iemRaiseDivideErrorJmp(pVCpu);
401#ifndef _MSC_VER
402 return VINF_IEM_RAISED_XCPT; /* not reached */
403#endif
404}
405
406
407/**
408 * Used by TB code when it wants to raise a \#UD.
409 */
410IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
411{
412 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
413 iemRaiseUndefinedOpcodeJmp(pVCpu);
414#ifndef _MSC_VER
415 return VINF_IEM_RAISED_XCPT; /* not reached */
416#endif
417}
418
419
420/**
421 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
422 *
423 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
424 */
425IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
426{
427 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
428 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
429 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
430 iemRaiseUndefinedOpcodeJmp(pVCpu);
431 else
432 iemRaiseDeviceNotAvailableJmp(pVCpu);
433#ifndef _MSC_VER
434 return VINF_IEM_RAISED_XCPT; /* not reached */
435#endif
436}
437
438
439/**
440 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
441 *
442 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
443 */
444IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
445{
446 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
447 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
448 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
449 iemRaiseUndefinedOpcodeJmp(pVCpu);
450 else
451 iemRaiseDeviceNotAvailableJmp(pVCpu);
452#ifndef _MSC_VER
453 return VINF_IEM_RAISED_XCPT; /* not reached */
454#endif
455}
456
457
458/**
459 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
460 *
461 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
462 */
463IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
464{
465 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
466 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
467 iemRaiseSimdFpExceptionJmp(pVCpu);
468 else
469 iemRaiseUndefinedOpcodeJmp(pVCpu);
470#ifndef _MSC_VER
471 return VINF_IEM_RAISED_XCPT; /* not reached */
472#endif
473}
474
475
476/**
477 * Used by TB code when it wants to raise a \#NM.
478 */
479IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
480{
481 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
482 iemRaiseDeviceNotAvailableJmp(pVCpu);
483#ifndef _MSC_VER
484 return VINF_IEM_RAISED_XCPT; /* not reached */
485#endif
486}
487
488
489/**
490 * Used by TB code when it wants to raise a \#GP(0).
491 */
492IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
493{
494 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
495 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
496#ifndef _MSC_VER
497 return VINF_IEM_RAISED_XCPT; /* not reached */
498#endif
499}
500
501
502/**
503 * Used by TB code when it wants to raise a \#MF.
504 */
505IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
506{
507 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
508 iemRaiseMathFaultJmp(pVCpu);
509#ifndef _MSC_VER
510 return VINF_IEM_RAISED_XCPT; /* not reached */
511#endif
512}
513
514
515/**
516 * Used by TB code when it wants to raise a \#XF.
517 */
518IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
519{
520 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
521 iemRaiseSimdFpExceptionJmp(pVCpu);
522#ifndef _MSC_VER
523 return VINF_IEM_RAISED_XCPT; /* not reached */
524#endif
525}
526
527
528/**
529 * Used by TB code when detecting opcode changes.
530 * @see iemThreadeFuncWorkerObsoleteTb
531 */
532IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
533{
534 /* We set fSafeToFree to false where as we're being called in the context
535 of a TB callback function, which for native TBs means we cannot release
536 the executable memory till we've returned our way back to iemTbExec as
537 that return path codes via the native code generated for the TB. */
538 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
539 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
540 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
541 return VINF_IEM_REEXEC_BREAK;
542}
543
544
545/**
546 * Used by TB code when we need to switch to a TB with CS.LIM checking.
547 */
548IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
549{
550 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
551 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
552 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
553 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
554 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
555 return VINF_IEM_REEXEC_BREAK;
556}
557
558
559/**
560 * Used by TB code when we missed a PC check after a branch.
561 */
562IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
563{
564 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
565 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
566 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
567 pVCpu->iem.s.pbInstrBuf));
568 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
569 return VINF_IEM_REEXEC_BREAK;
570}
571
572
573
574/*********************************************************************************************************************************
575* Helpers: Segmented memory fetches and stores. *
576*********************************************************************************************************************************/
577
578/**
579 * Used by TB code to load unsigned 8-bit data w/ segmentation.
580 */
581IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
582{
583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
584 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
585#else
586 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
587#endif
588}
589
590
591/**
592 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
593 * to 16 bits.
594 */
595IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
596{
597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
598 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
599#else
600 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
601#endif
602}
603
604
605/**
606 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
607 * to 32 bits.
608 */
609IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
610{
611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
612 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
613#else
614 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
615#endif
616}
617
618/**
619 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
620 * to 64 bits.
621 */
622IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
623{
624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
625 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
626#else
627 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
628#endif
629}
630
631
632/**
633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
634 */
635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
636{
637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
638 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
639#else
640 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
641#endif
642}
643
644
645/**
646 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
647 * to 32 bits.
648 */
649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
650{
651#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
652 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
653#else
654 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
655#endif
656}
657
658
659/**
660 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
661 * to 64 bits.
662 */
663IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
664{
665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
666 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
667#else
668 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
669#endif
670}
671
672
673/**
674 * Used by TB code to load unsigned 32-bit data w/ segmentation.
675 */
676IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
677{
678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
679 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
680#else
681 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
682#endif
683}
684
685
686/**
687 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
688 * to 64 bits.
689 */
690IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
691{
692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
693 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
694#else
695 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
696#endif
697}
698
699
700/**
701 * Used by TB code to load unsigned 64-bit data w/ segmentation.
702 */
703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
704{
705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
706 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
707#else
708 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
709#endif
710}
711
712
713#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
714/**
715 * Used by TB code to load 128-bit data w/ segmentation.
716 */
717IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
721#else
722 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load 128-bit data w/ segmentation.
729 */
730IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
731{
732#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
733 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
734#else
735 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
736#endif
737}
738
739
740/**
741 * Used by TB code to load 128-bit data w/ segmentation.
742 */
743IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
744{
745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
746 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
747#else
748 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
749#endif
750}
751
752
753/**
754 * Used by TB code to load 256-bit data w/ segmentation.
755 */
756IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
757{
758#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
759 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
760#else
761 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
762#endif
763}
764
765
766/**
767 * Used by TB code to load 256-bit data w/ segmentation.
768 */
769IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
770{
771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
772 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
773#else
774 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
775#endif
776}
777#endif
778
779
780/**
781 * Used by TB code to store unsigned 8-bit data w/ segmentation.
782 */
783IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
784{
785#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
786 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
787#else
788 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
789#endif
790}
791
792
793/**
794 * Used by TB code to store unsigned 16-bit data w/ segmentation.
795 */
796IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
797{
798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
799 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
800#else
801 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
802#endif
803}
804
805
806/**
807 * Used by TB code to store unsigned 32-bit data w/ segmentation.
808 */
809IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
810{
811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
812 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
813#else
814 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
815#endif
816}
817
818
819/**
820 * Used by TB code to store unsigned 64-bit data w/ segmentation.
821 */
822IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
823{
824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
825 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
826#else
827 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
828#endif
829}
830
831
832#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
833/**
834 * Used by TB code to store unsigned 128-bit data w/ segmentation.
835 */
836IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
837{
838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
839 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
840#else
841 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
842#endif
843}
844
845
846/**
847 * Used by TB code to store unsigned 128-bit data w/ segmentation.
848 */
849IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
850{
851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
852 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
853#else
854 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
855#endif
856}
857
858
859/**
860 * Used by TB code to store unsigned 256-bit data w/ segmentation.
861 */
862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
863{
864#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
865 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
866#else
867 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
868#endif
869}
870
871
872/**
873 * Used by TB code to store unsigned 256-bit data w/ segmentation.
874 */
875IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
876{
877#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
878 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
879#else
880 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
881#endif
882}
883#endif
884
885
886
887/**
888 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
889 */
890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
891{
892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
893 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
894#else
895 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
896#endif
897}
898
899
900/**
901 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
902 */
903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
904{
905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
906 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
907#else
908 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
909#endif
910}
911
912
913/**
914 * Used by TB code to store an 32-bit selector value onto a generic stack.
915 *
916 * Intel CPUs doesn't do write a whole dword, thus the special function.
917 */
918IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
919{
920#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
921 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
922#else
923 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
924#endif
925}
926
927
928/**
929 * Used by TB code to push unsigned 64-bit value onto a generic stack.
930 */
931IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
932{
933#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
934 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
935#else
936 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
937#endif
938}
939
940
941/**
942 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
943 */
944IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
945{
946#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
947 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
948#else
949 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
950#endif
951}
952
953
954/**
955 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
956 */
957IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
958{
959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
960 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
961#else
962 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
963#endif
964}
965
966
967/**
968 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
969 */
970IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
971{
972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
973 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
974#else
975 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
976#endif
977}
978
979
980
981/*********************************************************************************************************************************
982* Helpers: Flat memory fetches and stores. *
983*********************************************************************************************************************************/
984
985/**
986 * Used by TB code to load unsigned 8-bit data w/ flat address.
987 * @note Zero extending the value to 64-bit to simplify assembly.
988 */
989IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
990{
991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
992 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
993#else
994 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
995#endif
996}
997
998
999/**
1000 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1001 * to 16 bits.
1002 * @note Zero extending the value to 64-bit to simplify assembly.
1003 */
1004IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1005{
1006#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1007 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1008#else
1009 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1010#endif
1011}
1012
1013
1014/**
1015 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1016 * to 32 bits.
1017 * @note Zero extending the value to 64-bit to simplify assembly.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1022 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1023#else
1024 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1025#endif
1026}
1027
1028
1029/**
1030 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1031 * to 64 bits.
1032 */
1033IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1034{
1035#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1036 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1037#else
1038 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1039#endif
1040}
1041
1042
1043/**
1044 * Used by TB code to load unsigned 16-bit data w/ flat address.
1045 * @note Zero extending the value to 64-bit to simplify assembly.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1050 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1051#else
1052 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1059 * to 32 bits.
1060 * @note Zero extending the value to 64-bit to simplify assembly.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1065 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1066#else
1067 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1074 * to 64 bits.
1075 * @note Zero extending the value to 64-bit to simplify assembly.
1076 */
1077IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1078{
1079#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1080 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1081#else
1082 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1083#endif
1084}
1085
1086
1087/**
1088 * Used by TB code to load unsigned 32-bit data w/ flat address.
1089 * @note Zero extending the value to 64-bit to simplify assembly.
1090 */
1091IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1092{
1093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1094 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1095#else
1096 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1097#endif
1098}
1099
1100
1101/**
1102 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1103 * to 64 bits.
1104 * @note Zero extending the value to 64-bit to simplify assembly.
1105 */
1106IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1107{
1108#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1109 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1110#else
1111 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1112#endif
1113}
1114
1115
1116/**
1117 * Used by TB code to load unsigned 64-bit data w/ flat address.
1118 */
1119IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1120{
1121#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1122 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1123#else
1124 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1125#endif
1126}
1127
1128
1129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1130/**
1131 * Used by TB code to load unsigned 128-bit data w/ flat address.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1134{
1135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1136 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1137#else
1138 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1139#endif
1140}
1141
1142
1143/**
1144 * Used by TB code to load unsigned 128-bit data w/ flat address.
1145 */
1146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1147{
1148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1149 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1150#else
1151 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1152#endif
1153}
1154
1155
1156/**
1157 * Used by TB code to load unsigned 128-bit data w/ flat address.
1158 */
1159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1160{
1161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1162 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1163#else
1164 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1165#endif
1166}
1167
1168
1169/**
1170 * Used by TB code to load unsigned 256-bit data w/ flat address.
1171 */
1172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1173{
1174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1175 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1176#else
1177 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1178#endif
1179}
1180
1181
1182/**
1183 * Used by TB code to load unsigned 256-bit data w/ flat address.
1184 */
1185IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1186{
1187#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1188 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1189#else
1190 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1191#endif
1192}
1193#endif
1194
1195
1196/**
1197 * Used by TB code to store unsigned 8-bit data w/ flat address.
1198 */
1199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1200{
1201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1202 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1203#else
1204 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1205#endif
1206}
1207
1208
1209/**
1210 * Used by TB code to store unsigned 16-bit data w/ flat address.
1211 */
1212IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1213{
1214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1215 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1216#else
1217 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1218#endif
1219}
1220
1221
1222/**
1223 * Used by TB code to store unsigned 32-bit data w/ flat address.
1224 */
1225IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1226{
1227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1228 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1229#else
1230 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1231#endif
1232}
1233
1234
1235/**
1236 * Used by TB code to store unsigned 64-bit data w/ flat address.
1237 */
1238IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1239{
1240#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1241 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1242#else
1243 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1244#endif
1245}
1246
1247
1248#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1249/**
1250 * Used by TB code to store unsigned 128-bit data w/ flat address.
1251 */
1252IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1253{
1254#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1255 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1256#else
1257 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1258#endif
1259}
1260
1261
1262/**
1263 * Used by TB code to store unsigned 128-bit data w/ flat address.
1264 */
1265IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1266{
1267#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1268 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1269#else
1270 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1271#endif
1272}
1273
1274
1275/**
1276 * Used by TB code to store unsigned 256-bit data w/ flat address.
1277 */
1278IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1279{
1280#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1281 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1282#else
1283 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1284#endif
1285}
1286
1287
1288/**
1289 * Used by TB code to store unsigned 256-bit data w/ flat address.
1290 */
1291IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1292{
1293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1294 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1295#else
1296 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1297#endif
1298}
1299#endif
1300
1301
1302
1303/**
1304 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1305 */
1306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1307{
1308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1309 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1310#else
1311 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1312#endif
1313}
1314
1315
1316/**
1317 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1318 */
1319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1322 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1323#else
1324 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to store a segment selector value onto a flat stack.
1331 *
1332 * Intel CPUs doesn't do write a whole dword, thus the special function.
1333 */
1334IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1335{
1336#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1337 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1338#else
1339 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1340#endif
1341}
1342
1343
1344/**
1345 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1346 */
1347IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1350 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1351#else
1352 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1361{
1362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1363 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1364#else
1365 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1366#endif
1367}
1368
1369
1370/**
1371 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1372 */
1373IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1374{
1375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1376 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1377#else
1378 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1379#endif
1380}
1381
1382
1383/**
1384 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1385 */
1386IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1387{
1388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1389 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1390#else
1391 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1392#endif
1393}
1394
1395
1396
1397/*********************************************************************************************************************************
1398* Helpers: Segmented memory mapping. *
1399*********************************************************************************************************************************/
1400
1401/**
1402 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1403 * segmentation.
1404 */
1405IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1406 RTGCPTR GCPtrMem, uint8_t iSegReg))
1407{
1408#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1409 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#else
1411 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1412#endif
1413}
1414
1415
1416/**
1417 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1418 */
1419IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1420 RTGCPTR GCPtrMem, uint8_t iSegReg))
1421{
1422#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1423 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#else
1425 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1426#endif
1427}
1428
1429
1430/**
1431 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1432 */
1433IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1434 RTGCPTR GCPtrMem, uint8_t iSegReg))
1435{
1436#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1437 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#else
1439 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1440#endif
1441}
1442
1443
1444/**
1445 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1446 */
1447IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1448 RTGCPTR GCPtrMem, uint8_t iSegReg))
1449{
1450#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1451 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1452#else
1453 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1454#endif
1455}
1456
1457
1458/**
1459 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1460 * segmentation.
1461 */
1462IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1463 RTGCPTR GCPtrMem, uint8_t iSegReg))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1467#else
1468 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1477 RTGCPTR GCPtrMem, uint8_t iSegReg))
1478{
1479#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1480 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1481#else
1482 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1483#endif
1484}
1485
1486
1487/**
1488 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1489 */
1490IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1491 RTGCPTR GCPtrMem, uint8_t iSegReg))
1492{
1493#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1494 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1495#else
1496 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1497#endif
1498}
1499
1500
1501/**
1502 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1503 */
1504IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1505 RTGCPTR GCPtrMem, uint8_t iSegReg))
1506{
1507#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1508 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1509#else
1510 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1511#endif
1512}
1513
1514
1515/**
1516 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1517 * segmentation.
1518 */
1519IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1520 RTGCPTR GCPtrMem, uint8_t iSegReg))
1521{
1522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1523 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1524#else
1525 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1526#endif
1527}
1528
1529
1530/**
1531 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1532 */
1533IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1534 RTGCPTR GCPtrMem, uint8_t iSegReg))
1535{
1536#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1537 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1538#else
1539 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1540#endif
1541}
1542
1543
1544/**
1545 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1546 */
1547IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1548 RTGCPTR GCPtrMem, uint8_t iSegReg))
1549{
1550#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1551 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1552#else
1553 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1554#endif
1555}
1556
1557
1558/**
1559 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1560 */
1561IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1562 RTGCPTR GCPtrMem, uint8_t iSegReg))
1563{
1564#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1565 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1566#else
1567 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1568#endif
1569}
1570
1571
1572/**
1573 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1574 * segmentation.
1575 */
1576IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1577 RTGCPTR GCPtrMem, uint8_t iSegReg))
1578{
1579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1580 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1581#else
1582 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1583#endif
1584}
1585
1586
1587/**
1588 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1591 RTGCPTR GCPtrMem, uint8_t iSegReg))
1592{
1593#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1594 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1595#else
1596 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1597#endif
1598}
1599
1600
1601/**
1602 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1603 */
1604IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1605 RTGCPTR GCPtrMem, uint8_t iSegReg))
1606{
1607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1608 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1609#else
1610 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1619 RTGCPTR GCPtrMem, uint8_t iSegReg))
1620{
1621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1622 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1623#else
1624 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1633 RTGCPTR GCPtrMem, uint8_t iSegReg))
1634{
1635#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1636 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1637#else
1638 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1639#endif
1640}
1641
1642
1643/**
1644 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1647 RTGCPTR GCPtrMem, uint8_t iSegReg))
1648{
1649#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1650 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1651#else
1652 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1653#endif
1654}
1655
1656
1657/**
1658 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1659 * segmentation.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1662 RTGCPTR GCPtrMem, uint8_t iSegReg))
1663{
1664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1665 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1666#else
1667 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1668#endif
1669}
1670
1671
1672/**
1673 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1674 */
1675IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1676 RTGCPTR GCPtrMem, uint8_t iSegReg))
1677{
1678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1679 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1680#else
1681 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1682#endif
1683}
1684
1685
1686/**
1687 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1690 RTGCPTR GCPtrMem, uint8_t iSegReg))
1691{
1692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1693 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1694#else
1695 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1696#endif
1697}
1698
1699
1700/**
1701 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1704 RTGCPTR GCPtrMem, uint8_t iSegReg))
1705{
1706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1707 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1708#else
1709 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1710#endif
1711}
1712
1713
1714/*********************************************************************************************************************************
1715* Helpers: Flat memory mapping. *
1716*********************************************************************************************************************************/
1717
1718/**
1719 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1720 * address.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1723{
1724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1725 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1726#else
1727 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1728#endif
1729}
1730
1731
1732/**
1733 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1736{
1737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1738 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1739#else
1740 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1741#endif
1742}
1743
1744
1745/**
1746 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1747 */
1748IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1749{
1750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1751 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1752#else
1753 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1754#endif
1755}
1756
1757
1758/**
1759 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1764 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1765#else
1766 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1773 * address.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1778 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1779#else
1780 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1791 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1792#else
1793 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1802{
1803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1804 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1805#else
1806 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1807#endif
1808}
1809
1810
1811/**
1812 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1813 */
1814IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1815{
1816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1817 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1818#else
1819 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1820#endif
1821}
1822
1823
1824/**
1825 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1826 * address.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1831 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1832#else
1833 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1844 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1845#else
1846 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1857 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1858#else
1859 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1870 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1871#else
1872 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1879 * address.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1884 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1885#else
1886 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1897 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1898#else
1899 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1910 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1911#else
1912 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1923 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1924#else
1925 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1936 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1937#else
1938 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1949 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1950#else
1951 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1952#endif
1953}
1954
1955
1956/**
1957 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1958 * address.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1963 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1964#else
1965 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1976 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1977#else
1978 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1989 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1990#else
1991 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2002 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2003#else
2004 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2005#endif
2006}
2007
2008
2009/*********************************************************************************************************************************
2010* Helpers: Commit, rollback & unmap *
2011*********************************************************************************************************************************/
2012
2013/**
2014 * Used by TB code to commit and unmap a read-write memory mapping.
2015 */
2016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2017{
2018 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2019}
2020
2021
2022/**
2023 * Used by TB code to commit and unmap a read-write memory mapping.
2024 */
2025IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2026{
2027 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2028}
2029
2030
2031/**
2032 * Used by TB code to commit and unmap a write-only memory mapping.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2035{
2036 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2037}
2038
2039
2040/**
2041 * Used by TB code to commit and unmap a read-only memory mapping.
2042 */
2043IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2044{
2045 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2046}
2047
2048
2049/**
2050 * Reinitializes the native recompiler state.
2051 *
2052 * Called before starting a new recompile job.
2053 */
2054static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2055{
2056 pReNative->cLabels = 0;
2057 pReNative->bmLabelTypes = 0;
2058 pReNative->cFixups = 0;
2059 pReNative->cTbExitFixups = 0;
2060#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2061 pReNative->pDbgInfo->cEntries = 0;
2062 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2063#endif
2064 pReNative->pTbOrg = pTb;
2065 pReNative->cCondDepth = 0;
2066 pReNative->uCondSeqNo = 0;
2067 pReNative->uCheckIrqSeqNo = 0;
2068 pReNative->uTlbSeqNo = 0;
2069#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2070 pReNative->fSkippingEFlags = 0;
2071#endif
2072#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2073 pReNative->PostponedEfl.fEFlags = 0;
2074 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid;
2075 pReNative->PostponedEfl.cOpBits = 0;
2076 pReNative->PostponedEfl.idxReg1 = UINT8_MAX;
2077 pReNative->PostponedEfl.idxReg2 = UINT8_MAX;
2078#endif
2079
2080#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2081 pReNative->Core.offPc = 0;
2082# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2083 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2084# endif
2085# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2086 pReNative->Core.fDebugPcInitialized = false;
2087# endif
2088#endif
2089#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2090 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2091#endif
2092 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2093#if IEMNATIVE_HST_GREG_COUNT < 32
2094 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2095#endif
2096 ;
2097 pReNative->Core.bmHstRegsWithGstShadow = 0;
2098 pReNative->Core.bmGstRegShadows = 0;
2099#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2100 pReNative->Core.bmGstRegShadowDirty = 0;
2101#endif
2102 pReNative->Core.bmVars = 0;
2103 pReNative->Core.bmStack = 0;
2104 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2105 pReNative->Core.u64ArgVars = UINT64_MAX;
2106
2107 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2108 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2120 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2121 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2122 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2123 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2124 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2125 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2126 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2127 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2128 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2129 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2130 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2131
2132 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2133
2134 /* Full host register reinit: */
2135 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2136 {
2137 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2138 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2139 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2140 }
2141
2142 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2143 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2144#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2145 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2146#endif
2147#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2148 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2149#endif
2150#ifdef IEMNATIVE_REG_FIXED_TMP1
2151 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2152#endif
2153#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2154 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2155#endif
2156 );
2157 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2158 {
2159 fRegs &= ~RT_BIT_32(idxReg);
2160 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2161 }
2162
2163 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2164#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2165 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2166#endif
2167#ifdef IEMNATIVE_REG_FIXED_TMP0
2168 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2169#endif
2170#ifdef IEMNATIVE_REG_FIXED_TMP1
2171 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2172#endif
2173#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2174 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2175#endif
2176
2177#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2178 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2179# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2180 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2181# endif
2182 ;
2183 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2184 pReNative->Core.bmGstSimdRegShadows = 0;
2185 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2186 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2187
2188 /* Full host register reinit: */
2189 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2190 {
2191 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2192 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2193 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2194 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2195 }
2196
2197 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2198 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2199 {
2200 fRegs &= ~RT_BIT_32(idxReg);
2201 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2202 }
2203
2204#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2205 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2206#endif
2207
2208#endif
2209
2210 return pReNative;
2211}
2212
2213
2214/**
2215 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2216 */
2217static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2218{
2219 RTMemFree(pReNative->pInstrBuf);
2220 RTMemFree(pReNative->paLabels);
2221 RTMemFree(pReNative->paFixups);
2222 RTMemFree(pReNative->paTbExitFixups);
2223#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2224 RTMemFree(pReNative->pDbgInfo);
2225#endif
2226 RTMemFree(pReNative);
2227}
2228
2229
2230/**
2231 * Allocates and initializes the native recompiler state.
2232 *
2233 * This is called the first time an EMT wants to recompile something.
2234 *
2235 * @returns Pointer to the new recompiler state.
2236 * @param pVCpu The cross context virtual CPU structure of the calling
2237 * thread.
2238 * @param pTb The TB that's about to be recompiled. When this is NULL,
2239 * the recompiler state is for emitting the common per-chunk
2240 * code from iemNativeRecompileAttachExecMemChunkCtx.
2241 * @thread EMT(pVCpu)
2242 */
2243static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2244{
2245 VMCPU_ASSERT_EMT(pVCpu);
2246
2247 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2248 AssertReturn(pReNative, NULL);
2249
2250 /*
2251 * Try allocate all the buffers and stuff we need.
2252 */
2253 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2254 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2255 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2256 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2257 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2258#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2259 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2260#endif
2261 if (RT_LIKELY( pReNative->pInstrBuf
2262 && pReNative->paLabels
2263 && pReNative->paFixups
2264 && pReNative->paTbExitFixups)
2265#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2266 && pReNative->pDbgInfo
2267#endif
2268 )
2269 {
2270 /*
2271 * Set the buffer & array sizes on success.
2272 */
2273 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2274 pReNative->cLabelsAlloc = _8K / cFactor;
2275 pReNative->cFixupsAlloc = _16K / cFactor;
2276 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2277#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2278 pReNative->cDbgInfoAlloc = _16K / cFactor;
2279#endif
2280
2281 /* Other constant stuff: */
2282 pReNative->pVCpu = pVCpu;
2283
2284 /*
2285 * Done, just reinit it.
2286 */
2287 return iemNativeReInit(pReNative, pTb);
2288 }
2289
2290 /*
2291 * Failed. Cleanup and return.
2292 */
2293 AssertFailed();
2294 iemNativeTerm(pReNative);
2295 return NULL;
2296}
2297
2298
2299/**
2300 * Creates a label
2301 *
2302 * If the label does not yet have a defined position,
2303 * call iemNativeLabelDefine() later to set it.
2304 *
2305 * @returns Label ID. Throws VBox status code on failure, so no need to check
2306 * the return value.
2307 * @param pReNative The native recompile state.
2308 * @param enmType The label type.
2309 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2310 * label is not yet defined (default).
2311 * @param uData Data associated with the lable. Only applicable to
2312 * certain type of labels. Default is zero.
2313 */
2314DECL_HIDDEN_THROW(uint32_t)
2315iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2316 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2317{
2318 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2319#if defined(RT_ARCH_AMD64)
2320 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2321#endif
2322
2323 /*
2324 * Locate existing label definition.
2325 *
2326 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2327 * and uData is zero.
2328 */
2329 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2330 uint32_t const cLabels = pReNative->cLabels;
2331 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2332#ifndef VBOX_STRICT
2333 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2334 && offWhere == UINT32_MAX
2335 && uData == 0
2336#endif
2337 )
2338 {
2339#ifndef VBOX_STRICT
2340 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2342 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2343 if (idxLabel < pReNative->cLabels)
2344 return idxLabel;
2345#else
2346 for (uint32_t i = 0; i < cLabels; i++)
2347 if ( paLabels[i].enmType == enmType
2348 && paLabels[i].uData == uData)
2349 {
2350 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2351 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2352 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2353 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2354 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2355 return i;
2356 }
2357 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2358 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2359#endif
2360 }
2361
2362 /*
2363 * Make sure we've got room for another label.
2364 */
2365 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2366 { /* likely */ }
2367 else
2368 {
2369 uint32_t cNew = pReNative->cLabelsAlloc;
2370 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2371 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2372 cNew *= 2;
2373 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2374 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2375 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2376 pReNative->paLabels = paLabels;
2377 pReNative->cLabelsAlloc = cNew;
2378 }
2379
2380 /*
2381 * Define a new label.
2382 */
2383 paLabels[cLabels].off = offWhere;
2384 paLabels[cLabels].enmType = enmType;
2385 paLabels[cLabels].uData = uData;
2386 pReNative->cLabels = cLabels + 1;
2387
2388 Assert((unsigned)enmType < 64);
2389 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2390
2391 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2392 {
2393 Assert(uData == 0);
2394 pReNative->aidxUniqueLabels[enmType] = cLabels;
2395 }
2396
2397 if (offWhere != UINT32_MAX)
2398 {
2399#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2400 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2401 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2402#endif
2403 }
2404 return cLabels;
2405}
2406
2407
2408/**
2409 * Defines the location of an existing label.
2410 *
2411 * @param pReNative The native recompile state.
2412 * @param idxLabel The label to define.
2413 * @param offWhere The position.
2414 */
2415DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2416{
2417 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2418 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2419 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2420 pLabel->off = offWhere;
2421#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2422 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2423 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2424#endif
2425}
2426
2427
2428/**
2429 * Looks up a lable.
2430 *
2431 * @returns Label ID if found, UINT32_MAX if not.
2432 */
2433DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2434 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2435{
2436 Assert((unsigned)enmType < 64);
2437 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2438 {
2439 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2440 return pReNative->aidxUniqueLabels[enmType];
2441
2442 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2443 uint32_t const cLabels = pReNative->cLabels;
2444 for (uint32_t i = 0; i < cLabels; i++)
2445 if ( paLabels[i].enmType == enmType
2446 && paLabels[i].uData == uData
2447 && ( paLabels[i].off == offWhere
2448 || offWhere == UINT32_MAX
2449 || paLabels[i].off == UINT32_MAX))
2450 return i;
2451 }
2452 return UINT32_MAX;
2453}
2454
2455
2456/**
2457 * Adds a fixup.
2458 *
2459 * @throws VBox status code (int) on failure.
2460 * @param pReNative The native recompile state.
2461 * @param offWhere The instruction offset of the fixup location.
2462 * @param idxLabel The target label ID for the fixup.
2463 * @param enmType The fixup type.
2464 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2465 */
2466DECL_HIDDEN_THROW(void)
2467iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2468 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2469{
2470 Assert(idxLabel <= UINT16_MAX);
2471 Assert((unsigned)enmType <= UINT8_MAX);
2472#ifdef RT_ARCH_ARM64
2473 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2474 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2476#endif
2477
2478 /*
2479 * Make sure we've room.
2480 */
2481 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2482 uint32_t const cFixups = pReNative->cFixups;
2483 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2484 { /* likely */ }
2485 else
2486 {
2487 uint32_t cNew = pReNative->cFixupsAlloc;
2488 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2489 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2490 cNew *= 2;
2491 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2492 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2493 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2494 pReNative->paFixups = paFixups;
2495 pReNative->cFixupsAlloc = cNew;
2496 }
2497
2498 /*
2499 * Add the fixup.
2500 */
2501 paFixups[cFixups].off = offWhere;
2502 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2503 paFixups[cFixups].enmType = enmType;
2504 paFixups[cFixups].offAddend = offAddend;
2505 pReNative->cFixups = cFixups + 1;
2506}
2507
2508
2509/**
2510 * Adds a fixup to the per chunk tail code.
2511 *
2512 * @throws VBox status code (int) on failure.
2513 * @param pReNative The native recompile state.
2514 * @param offWhere The instruction offset of the fixup location.
2515 * @param enmExitReason The exit reason to jump to.
2516 */
2517DECL_HIDDEN_THROW(void)
2518iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2519{
2520 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2521
2522 /*
2523 * Make sure we've room.
2524 */
2525 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2526 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2527 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2528 { /* likely */ }
2529 else
2530 {
2531 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2532 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2533 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2534 cNew *= 2;
2535 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2536 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2537 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2538 pReNative->paTbExitFixups = paTbExitFixups;
2539 pReNative->cTbExitFixupsAlloc = cNew;
2540 }
2541
2542 /*
2543 * Add the fixup.
2544 */
2545 paTbExitFixups[cTbExitFixups].off = offWhere;
2546 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2547 pReNative->cTbExitFixups = cTbExitFixups + 1;
2548}
2549
2550
2551/**
2552 * Slow code path for iemNativeInstrBufEnsure.
2553 */
2554DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2555{
2556 /* Double the buffer size till we meet the request. */
2557 uint32_t cNew = pReNative->cInstrBufAlloc;
2558 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2559 do
2560 cNew *= 2;
2561 while (cNew < off + cInstrReq);
2562
2563 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2564#ifdef RT_ARCH_ARM64
2565 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2566#else
2567 uint32_t const cbMaxInstrBuf = _2M;
2568#endif
2569 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2570
2571 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2572 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2573
2574#ifdef VBOX_STRICT
2575 pReNative->offInstrBufChecked = off + cInstrReq;
2576#endif
2577 pReNative->cInstrBufAlloc = cNew;
2578 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2579}
2580
2581#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2582
2583/**
2584 * Grows the static debug info array used during recompilation.
2585 *
2586 * @returns Pointer to the new debug info block; throws VBox status code on
2587 * failure, so no need to check the return value.
2588 */
2589DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2590{
2591 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2592 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2593 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2594 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2595 pReNative->pDbgInfo = pDbgInfo;
2596 pReNative->cDbgInfoAlloc = cNew;
2597 return pDbgInfo;
2598}
2599
2600
2601/**
2602 * Adds a new debug info uninitialized entry, returning the pointer to it.
2603 */
2604DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2605{
2606 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2607 { /* likely */ }
2608 else
2609 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2610 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2611}
2612
2613
2614/**
2615 * Debug Info: Adds a native offset record, if necessary.
2616 */
2617DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2618{
2619 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2620
2621 /*
2622 * Do we need this one?
2623 */
2624 uint32_t const offPrev = pDbgInfo->offNativeLast;
2625 if (offPrev == off)
2626 return;
2627 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2628
2629 /*
2630 * Add it.
2631 */
2632 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2633 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2634 pEntry->NativeOffset.offNative = off;
2635 pDbgInfo->offNativeLast = off;
2636}
2637
2638
2639/**
2640 * Debug Info: Record info about a label.
2641 */
2642static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2643{
2644 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2645 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2646 pEntry->Label.uUnused = 0;
2647 pEntry->Label.enmLabel = (uint8_t)enmType;
2648 pEntry->Label.uData = uData;
2649}
2650
2651
2652/**
2653 * Debug Info: Record info about a threaded call.
2654 */
2655static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2656{
2657 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2658 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2659 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2660 pEntry->ThreadedCall.uUnused = 0;
2661 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2662}
2663
2664
2665/**
2666 * Debug Info: Record info about a new guest instruction.
2667 */
2668static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2669{
2670 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2671 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2672 pEntry->GuestInstruction.uUnused = 0;
2673 pEntry->GuestInstruction.fExec = fExec;
2674}
2675
2676
2677/**
2678 * Debug Info: Record info about guest register shadowing.
2679 */
2680DECL_HIDDEN_THROW(void)
2681iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2682 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2683{
2684 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2685 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2686 pEntry->GuestRegShadowing.uUnused = 0;
2687 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2688 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2689 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2690#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2691 Assert( idxHstReg != UINT8_MAX
2692 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2693#endif
2694}
2695
2696
2697# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2698/**
2699 * Debug Info: Record info about guest register shadowing.
2700 */
2701DECL_HIDDEN_THROW(void)
2702iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2703 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2704{
2705 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2706 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2707 pEntry->GuestSimdRegShadowing.uUnused = 0;
2708 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2709 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2710 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2711}
2712# endif
2713
2714
2715# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2716/**
2717 * Debug Info: Record info about delayed RIP updates.
2718 */
2719DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2720{
2721 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2722 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2723 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2724 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2725}
2726# endif
2727
2728# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2729
2730/**
2731 * Debug Info: Record info about a dirty guest register.
2732 */
2733DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2734 uint8_t idxGstReg, uint8_t idxHstReg)
2735{
2736 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2737 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2738 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2739 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2740 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2741}
2742
2743
2744/**
2745 * Debug Info: Record info about a dirty guest register writeback operation.
2746 */
2747DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2748{
2749 unsigned const cBitsGstRegMask = 25;
2750 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2751
2752 /* The first block of 25 bits: */
2753 if (fGstReg & fGstRegMask)
2754 {
2755 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2756 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2757 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2758 pEntry->GuestRegWriteback.cShift = 0;
2759 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2760 fGstReg &= ~(uint64_t)fGstRegMask;
2761 if (!fGstReg)
2762 return;
2763 }
2764
2765 /* The second block of 25 bits: */
2766 fGstReg >>= cBitsGstRegMask;
2767 if (fGstReg & fGstRegMask)
2768 {
2769 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2770 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2771 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2772 pEntry->GuestRegWriteback.cShift = 0;
2773 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2774 fGstReg &= ~(uint64_t)fGstRegMask;
2775 if (!fGstReg)
2776 return;
2777 }
2778
2779 /* The last block with 14 bits: */
2780 fGstReg >>= cBitsGstRegMask;
2781 Assert(fGstReg & fGstRegMask);
2782 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2783 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2784 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2785 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2786 pEntry->GuestRegWriteback.cShift = 2;
2787 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2788}
2789
2790# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2791
2792# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2793/**
2794 * Debug Info: Record info about emitting a postponed EFLAGS calculation.
2795 */
2796DECL_HIDDEN_THROW(void)
2797iemNativeDbgInfoAddPostponedEFlagsCalc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVE_POSTPONED_EFL_OP_T enmOp,
2798 uint8_t cOpBits, uint8_t idxEmit)
2799{
2800 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2801 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2802 pEntry->PostponedEflCalc.uType = kIemTbDbgEntryType_PostponedEFlagsCalc;
2803 pEntry->PostponedEflCalc.enmOp = (unsigned)enmOp;
2804 pEntry->PostponedEflCalc.cOpBits = cOpBits;
2805 pEntry->PostponedEflCalc.idxEmit = idxEmit;
2806 pEntry->PostponedEflCalc.uUnused = 0;
2807}
2808# endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */
2809
2810#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2811
2812
2813/*********************************************************************************************************************************
2814* Register Allocator *
2815*********************************************************************************************************************************/
2816
2817/**
2818 * Register parameter indexes (indexed by argument number).
2819 */
2820DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2821{
2822 IEMNATIVE_CALL_ARG0_GREG,
2823 IEMNATIVE_CALL_ARG1_GREG,
2824 IEMNATIVE_CALL_ARG2_GREG,
2825 IEMNATIVE_CALL_ARG3_GREG,
2826#if defined(IEMNATIVE_CALL_ARG4_GREG)
2827 IEMNATIVE_CALL_ARG4_GREG,
2828# if defined(IEMNATIVE_CALL_ARG5_GREG)
2829 IEMNATIVE_CALL_ARG5_GREG,
2830# if defined(IEMNATIVE_CALL_ARG6_GREG)
2831 IEMNATIVE_CALL_ARG6_GREG,
2832# if defined(IEMNATIVE_CALL_ARG7_GREG)
2833 IEMNATIVE_CALL_ARG7_GREG,
2834# endif
2835# endif
2836# endif
2837#endif
2838};
2839AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2840
2841/**
2842 * Call register masks indexed by argument count.
2843 */
2844DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2845{
2846 0,
2847 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2848 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2849 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2850 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2851 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2852#if defined(IEMNATIVE_CALL_ARG4_GREG)
2853 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2854 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2855# if defined(IEMNATIVE_CALL_ARG5_GREG)
2856 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2857 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2858# if defined(IEMNATIVE_CALL_ARG6_GREG)
2859 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2860 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2861 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2862# if defined(IEMNATIVE_CALL_ARG7_GREG)
2863 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2864 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2865 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2866# endif
2867# endif
2868# endif
2869#endif
2870};
2871
2872#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2873/**
2874 * BP offset of the stack argument slots.
2875 *
2876 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2877 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2878 */
2879DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2880{
2881 IEMNATIVE_FP_OFF_STACK_ARG0,
2882# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2883 IEMNATIVE_FP_OFF_STACK_ARG1,
2884# endif
2885# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2886 IEMNATIVE_FP_OFF_STACK_ARG2,
2887# endif
2888# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2889 IEMNATIVE_FP_OFF_STACK_ARG3,
2890# endif
2891};
2892AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2893#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2894
2895/**
2896 * Info about shadowed guest register values.
2897 * @see IEMNATIVEGSTREG
2898 */
2899DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2900{
2901#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2902 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2903 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2904 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2905 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2906 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2907 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2908 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2909 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2910 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2911 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2912 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2913 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2914 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2915 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2916 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2917 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2918 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2919 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2920 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2921 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2922 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2923 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2924 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2925 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2926 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2927 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2928 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2929 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2930 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2931 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2932 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2933 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2934 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2935 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2936 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2937 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2938 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2939 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2940 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2941 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2942 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2943 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2944 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2945 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2946 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2947 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2948 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2949 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2950 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2951 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2952 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2953 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2954 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2955 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2956#undef CPUMCTX_OFF_AND_SIZE
2957};
2958AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2959
2960
2961/** Host CPU general purpose register names. */
2962DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2963{
2964#ifdef RT_ARCH_AMD64
2965 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2966#elif RT_ARCH_ARM64
2967 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2968 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2969#else
2970# error "port me"
2971#endif
2972};
2973
2974
2975#if 0 /* unused */
2976/**
2977 * Tries to locate a suitable register in the given register mask.
2978 *
2979 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2980 * failed.
2981 *
2982 * @returns Host register number on success, returns UINT8_MAX on failure.
2983 */
2984static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2985{
2986 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2987 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2988 if (fRegs)
2989 {
2990 /** @todo pick better here: */
2991 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2992
2993 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2994 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2995 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2996 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2997
2998 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2999 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3000 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3001 return idxReg;
3002 }
3003 return UINT8_MAX;
3004}
3005#endif /* unused */
3006
3007#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3008
3009/**
3010 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
3011 *
3012 * @returns New code buffer offset on success, UINT32_MAX on failure.
3013 * @param pReNative .
3014 * @param off The current code buffer position.
3015 * @param enmGstReg The guest register to store to.
3016 * @param idxHstReg The host register to store from.
3017 */
3018DECL_FORCE_INLINE_THROW(uint32_t)
3019iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3020{
3021 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3022 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3023
3024 switch (g_aGstShadowInfo[enmGstReg].cb)
3025 {
3026 case sizeof(uint64_t):
3027 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3028 case sizeof(uint32_t):
3029 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3030 case sizeof(uint16_t):
3031 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3032# if 0 /* not present in the table. */
3033 case sizeof(uint8_t):
3034 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3035# endif
3036 default:
3037 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3038 }
3039}
3040
3041
3042/**
3043 * Emits code to flush a pending write of the given guest register,
3044 * version with alternative core state.
3045 *
3046 * @returns New code buffer offset.
3047 * @param pReNative The native recompile state.
3048 * @param off Current code buffer position.
3049 * @param pCore Alternative core state.
3050 * @param enmGstReg The guest register to flush.
3051 */
3052DECL_HIDDEN_THROW(uint32_t)
3053iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3054{
3055 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3056
3057 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3058 && enmGstReg <= kIemNativeGstReg_GprLast)
3059 || enmGstReg == kIemNativeGstReg_MxCsr);
3060 Assert( idxHstReg != UINT8_MAX
3061 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3062 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3063 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3064
3065 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3066
3067 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3068 return off;
3069}
3070
3071
3072/**
3073 * Emits code to flush a pending write of the given guest register.
3074 *
3075 * @returns New code buffer offset.
3076 * @param pReNative The native recompile state.
3077 * @param off Current code buffer position.
3078 * @param enmGstReg The guest register to flush.
3079 */
3080DECL_HIDDEN_THROW(uint32_t)
3081iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3082{
3083 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3084
3085 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3086 && enmGstReg <= kIemNativeGstReg_GprLast)
3087 || enmGstReg == kIemNativeGstReg_MxCsr);
3088 Assert( idxHstReg != UINT8_MAX
3089 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3090 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3091 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3092
3093 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3094
3095 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3096 return off;
3097}
3098
3099
3100/**
3101 * Flush the given set of guest registers if marked as dirty.
3102 *
3103 * @returns New code buffer offset.
3104 * @param pReNative The native recompile state.
3105 * @param off Current code buffer position.
3106 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3107 * @note Must not modify the host status flags!
3108 */
3109DECL_HIDDEN_THROW(uint32_t)
3110iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3111{
3112 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3113 if (bmGstRegShadowDirty)
3114 {
3115# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3116 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3117 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3118# endif
3119 do
3120 {
3121 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3122 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3123 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3124 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3125 } while (bmGstRegShadowDirty);
3126 }
3127
3128 return off;
3129}
3130
3131
3132/**
3133 * Flush all shadowed guest registers marked as dirty for the given host register.
3134 *
3135 * @returns New code buffer offset.
3136 * @param pReNative The native recompile state.
3137 * @param off Current code buffer position.
3138 * @param idxHstReg The host register.
3139 *
3140 * @note This doesn't do any unshadowing of guest registers from the host register.
3141 *
3142 * @note Must not modify the host status flags!
3143 */
3144DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3145{
3146 /* We need to flush any pending guest register writes this host register shadows. */
3147 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3148 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3149 {
3150# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3151 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3152 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3153# endif
3154 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3155 do
3156 {
3157 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3158 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3159 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3160 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3161 } while (bmGstRegShadowDirty);
3162 }
3163
3164 return off;
3165}
3166
3167#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3168
3169
3170/**
3171 * Locate a register, possibly freeing one up.
3172 *
3173 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3174 * failed.
3175 *
3176 * @returns Host register number on success. Returns UINT8_MAX if no registers
3177 * found, the caller is supposed to deal with this and raise a
3178 * allocation type specific status code (if desired).
3179 *
3180 * @throws VBox status code if we're run into trouble spilling a variable of
3181 * recording debug info. Does NOT throw anything if we're out of
3182 * registers, though.
3183 *
3184 * @note Must not modify the host status flags!
3185 */
3186static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3187 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3188{
3189 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3190 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3191 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3192
3193 /*
3194 * Try a freed register that's shadowing a guest register.
3195 */
3196 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3197 if (fRegs)
3198 {
3199 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3200
3201#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3202 /*
3203 * When we have liveness information, we use it to kick out all shadowed
3204 * guest register that will not be needed any more in this TB. If we're
3205 * lucky, this may prevent us from ending up here again.
3206 *
3207 * Note! We must consider the previous entry here so we don't free
3208 * anything that the current threaded function requires (current
3209 * entry is produced by the next threaded function).
3210 */
3211 uint32_t const idxCurCall = pReNative->idxCurCall;
3212 if (idxCurCall > 0)
3213 {
3214 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3215 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3216
3217 /* Merge EFLAGS. */
3218 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3219 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3220 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3221 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3222 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3223
3224 /* If it matches any shadowed registers. */
3225 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3226 {
3227#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3228 /* Writeback any dirty shadow registers we are about to unshadow. */
3229 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3230#endif
3231
3232 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3233 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3234 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3235
3236 /* See if we've got any unshadowed registers we can return now. */
3237 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3238 if (fUnshadowedRegs)
3239 {
3240 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3241 return (fPreferVolatile
3242 ? ASMBitFirstSetU32(fUnshadowedRegs)
3243 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3244 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3245 - 1;
3246 }
3247 }
3248 }
3249#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3250
3251 unsigned const idxReg = (fPreferVolatile
3252 ? ASMBitFirstSetU32(fRegs)
3253 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3254 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3255 - 1;
3256
3257 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3258 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3259 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3260 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3261
3262#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3263 /* We need to flush any pending guest register writes this host register shadows. */
3264 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3265#endif
3266
3267 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3268 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3269 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3270 return idxReg;
3271 }
3272
3273 /*
3274 * Try free up a variable that's in a register.
3275 *
3276 * We do two rounds here, first evacuating variables we don't need to be
3277 * saved on the stack, then in the second round move things to the stack.
3278 */
3279 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3280 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3281 {
3282 uint32_t fVars = pReNative->Core.bmVars;
3283 while (fVars)
3284 {
3285 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3286 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3287#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3288 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3289 continue;
3290#endif
3291
3292 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3293 && (RT_BIT_32(idxReg) & fRegMask)
3294 && ( iLoop == 0
3295 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3296 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3297 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3298 {
3299 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3300 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3301 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3302 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3303 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3304 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3305#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3306 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3307#endif
3308
3309 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3310 {
3311 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3312 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3313 }
3314
3315 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3316 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3317
3318 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3319 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3320 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3321 return idxReg;
3322 }
3323 fVars &= ~RT_BIT_32(idxVar);
3324 }
3325 }
3326
3327 return UINT8_MAX;
3328}
3329
3330
3331/**
3332 * Reassigns a variable to a different register specified by the caller.
3333 *
3334 * @returns The new code buffer position.
3335 * @param pReNative The native recompile state.
3336 * @param off The current code buffer position.
3337 * @param idxVar The variable index.
3338 * @param idxRegOld The old host register number.
3339 * @param idxRegNew The new host register number.
3340 * @param pszCaller The caller for logging.
3341 */
3342static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3343 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3344{
3345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3346 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3347#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3348 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3349#endif
3350 RT_NOREF(pszCaller);
3351
3352#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3353 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3354#endif
3355 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3356
3357 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3358#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3359 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3360#endif
3361 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3362 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3363 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3364
3365 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3366 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3367 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3368 if (fGstRegShadows)
3369 {
3370 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3371 | RT_BIT_32(idxRegNew);
3372 while (fGstRegShadows)
3373 {
3374 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3375 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3376
3377 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3378 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3379 }
3380 }
3381
3382 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3383 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3384 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3385 return off;
3386}
3387
3388
3389/**
3390 * Moves a variable to a different register or spills it onto the stack.
3391 *
3392 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3393 * kinds can easily be recreated if needed later.
3394 *
3395 * @returns The new code buffer position.
3396 * @param pReNative The native recompile state.
3397 * @param off The current code buffer position.
3398 * @param idxVar The variable index.
3399 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3400 * call-volatile registers.
3401 */
3402DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3403 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3404{
3405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3406 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3407 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3408 Assert(!pVar->fRegAcquired);
3409
3410 uint8_t const idxRegOld = pVar->idxReg;
3411 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3412 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3413 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3414 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3415 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3416 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3417 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3418 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3419#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3420 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3421#endif
3422
3423
3424 /** @todo Add statistics on this.*/
3425 /** @todo Implement basic variable liveness analysis (python) so variables
3426 * can be freed immediately once no longer used. This has the potential to
3427 * be trashing registers and stack for dead variables.
3428 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3429
3430 /*
3431 * First try move it to a different register, as that's cheaper.
3432 */
3433 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3434 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3435 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3436 if (fRegs)
3437 {
3438 /* Avoid using shadow registers, if possible. */
3439 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3440 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3441 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3442 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3443 }
3444
3445 /*
3446 * Otherwise we must spill the register onto the stack.
3447 */
3448 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3449 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3450 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3451 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3452
3453 pVar->idxReg = UINT8_MAX;
3454 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3455 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3456 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3457 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3458 return off;
3459}
3460
3461
3462/**
3463 * Allocates a temporary host general purpose register.
3464 *
3465 * This may emit code to save register content onto the stack in order to free
3466 * up a register.
3467 *
3468 * @returns The host register number; throws VBox status code on failure,
3469 * so no need to check the return value.
3470 * @param pReNative The native recompile state.
3471 * @param poff Pointer to the variable with the code buffer position.
3472 * This will be update if we need to move a variable from
3473 * register to stack in order to satisfy the request.
3474 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3475 * registers (@c true, default) or the other way around
3476 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3477 *
3478 * @note Must not modify the host status flags!
3479 */
3480DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3481{
3482 /*
3483 * Try find a completely unused register, preferably a call-volatile one.
3484 */
3485 uint8_t idxReg;
3486 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3487 & ~pReNative->Core.bmHstRegsWithGstShadow
3488 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3489 if (fRegs)
3490 {
3491 if (fPreferVolatile)
3492 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3493 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3494 else
3495 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3496 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3497 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3498 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3499 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3500 }
3501 else
3502 {
3503 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3504 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3505 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3506 }
3507 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3508}
3509
3510
3511/**
3512 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3513 * registers.
3514 *
3515 * @returns The host register number; throws VBox status code on failure,
3516 * so no need to check the return value.
3517 * @param pReNative The native recompile state.
3518 * @param poff Pointer to the variable with the code buffer position.
3519 * This will be update if we need to move a variable from
3520 * register to stack in order to satisfy the request.
3521 * @param fRegMask Mask of acceptable registers.
3522 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3523 * registers (@c true, default) or the other way around
3524 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3525 */
3526DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3527 bool fPreferVolatile /*= true*/)
3528{
3529 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3530 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3531
3532 /*
3533 * Try find a completely unused register, preferably a call-volatile one.
3534 */
3535 uint8_t idxReg;
3536 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3537 & ~pReNative->Core.bmHstRegsWithGstShadow
3538 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3539 & fRegMask;
3540 if (fRegs)
3541 {
3542 if (fPreferVolatile)
3543 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3544 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3545 else
3546 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3547 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3548 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3549 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3550 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3551 }
3552 else
3553 {
3554 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3555 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3556 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3557 }
3558 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3559}
3560
3561
3562/**
3563 * Allocates a temporary register for loading an immediate value into.
3564 *
3565 * This will emit code to load the immediate, unless there happens to be an
3566 * unused register with the value already loaded.
3567 *
3568 * The caller will not modify the returned register, it must be considered
3569 * read-only. Free using iemNativeRegFreeTmpImm.
3570 *
3571 * @returns The host register number; throws VBox status code on failure, so no
3572 * need to check the return value.
3573 * @param pReNative The native recompile state.
3574 * @param poff Pointer to the variable with the code buffer position.
3575 * @param uImm The immediate value that the register must hold upon
3576 * return.
3577 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3578 * registers (@c true, default) or the other way around
3579 * (@c false).
3580 *
3581 * @note Reusing immediate values has not been implemented yet.
3582 */
3583DECL_HIDDEN_THROW(uint8_t)
3584iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3585{
3586 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3587 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3588 return idxReg;
3589}
3590
3591
3592/**
3593 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3594 * iemNativeRegAllocTmpForGuestEFlags().
3595 *
3596 * See iemNativeRegAllocTmpForGuestRegInt() for details.
3597 */
3598template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, uint32_t const a_fRegMask>
3599static uint8_t iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3600{
3601 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3602#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3603 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3604#endif
3605
3606 /*
3607 * First check if the guest register value is already in a host register.
3608 */
3609 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3610 {
3611 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3612 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3613 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3614 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3615
3616 /* It's not supposed to be allocated... */
3617 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3618 {
3619 /*
3620 * If the register will trash the guest shadow copy, try find a
3621 * completely unused register we can use instead. If that fails,
3622 * we need to disassociate the host reg from the guest reg.
3623 */
3624 /** @todo would be nice to know if preserving the register is in any way helpful. */
3625 /* If the purpose is calculations, try duplicate the register value as
3626 we'll be clobbering the shadow. */
3627 if ( a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3628 && ( ~pReNative->Core.bmHstRegs
3629 & ~pReNative->Core.bmHstRegsWithGstShadow
3630 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3631 {
3632 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask);
3633
3634 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3635
3636 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3637 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3638 g_apszIemNativeHstRegNames[idxRegNew]));
3639 idxReg = idxRegNew;
3640 }
3641 /* If the current register matches the restrictions, go ahead and allocate
3642 it for the caller. */
3643 else if (a_fRegMask & RT_BIT_32(idxReg))
3644 {
3645 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3646 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3647 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3648 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3649 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n", g_apszIemNativeHstRegNames[idxReg],
3650 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3651 else
3652 {
3653 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3654 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3655 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3656 }
3657 }
3658 /* Otherwise, allocate a register that satisfies the caller and transfer
3659 the shadowing if compatible with the intended use. (This basically
3660 means the call wants a non-volatile register (RSP push/pop scenario).) */
3661 else
3662 {
3663 Assert(!(a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3664 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg),
3665 (a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3666 && a_enmIntendedUse == kIemNativeGstRegUse_Calculation);
3667 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3668 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3669 {
3670 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3671 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3672 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3673 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3674 }
3675 else
3676 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3677 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3678 g_apszIemNativeHstRegNames[idxRegNew]));
3679 idxReg = idxRegNew;
3680 }
3681 }
3682 else
3683 {
3684 /*
3685 * Oops. Shadowed guest register already allocated!
3686 *
3687 * Allocate a new register, copy the value and, if updating, the
3688 * guest shadow copy assignment to the new register.
3689 */
3690 AssertMsg( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3691 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3692 ("This shouldn't happen: idxReg=%d enmGstReg=%d a_enmIntendedUse=%s\n",
3693 idxReg, enmGstReg, s_pszIntendedUse[a_enmIntendedUse]));
3694
3695 /** @todo share register for readonly access. */
3696 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask,
3697 a_enmIntendedUse == kIemNativeGstRegUse_Calculation);
3698
3699 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3700 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3701
3702 if RT_CONSTEXPR_IF( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3703 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3704 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3705 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3706 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3707 else
3708 {
3709 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3710 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3711 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3712 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3713 }
3714 idxReg = idxRegNew;
3715 }
3716 Assert(RT_BIT_32(idxReg) & a_fRegMask); /* See assumption in fNoVolatileRegs docs. */
3717
3718#ifdef VBOX_STRICT
3719 /* Strict builds: Check that the value is correct. */
3720 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3721#endif
3722
3723#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3724 /** @todo r=aeichner Implement for registers other than GPR as well. */
3725 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3726 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3727 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3728 && enmGstReg <= kIemNativeGstReg_GprLast)
3729 || enmGstReg == kIemNativeGstReg_MxCsr)
3730 {
3731# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3732 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3733 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3734# endif
3735 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3736 }
3737#endif
3738
3739 return idxReg;
3740 }
3741
3742 /*
3743 * Allocate a new register, load it with the guest value and designate it as a copy of the
3744 */
3745 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask,
3746 a_enmIntendedUse == kIemNativeGstRegUse_Calculation);
3747
3748 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3749 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3750
3751 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3752 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3753 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3754 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3755
3756#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3757 /** @todo r=aeichner Implement for registers other than GPR as well. */
3758 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3759 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3760 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3761 && enmGstReg <= kIemNativeGstReg_GprLast)
3762 || enmGstReg == kIemNativeGstReg_MxCsr)
3763 {
3764# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3765 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3766 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3767# endif
3768 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3769 }
3770#endif
3771
3772 return idxRegNew;
3773}
3774
3775
3776/**
3777 * Allocates a temporary host general purpose register for keeping a guest
3778 * register value.
3779 *
3780 * Since we may already have a register holding the guest register value,
3781 * code will be emitted to do the loading if that's not the case. Code may also
3782 * be emitted if we have to free up a register to satify the request.
3783 *
3784 * @returns The host register number; throws VBox status code on failure, so no
3785 * need to check the return value.
3786 * @param pReNative The native recompile state.
3787 * @param poff Pointer to the variable with the code buffer
3788 * position. This will be update if we need to move a
3789 * variable from register to stack in order to satisfy
3790 * the request.
3791 * @param enmGstReg The guest register that will is to be updated.
3792 * @param a_enmIntendedUse How the caller will be using the host register.
3793 * @param a_fNonVolatileRegs Set if no volatile register allowed, clear if any
3794 * register is okay (default). The ASSUMPTION here is
3795 * that the caller has already flushed all volatile
3796 * registers, so this is only applied if we allocate a
3797 * new register.
3798 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3799 */
3800template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, bool const a_fNonVolatileRegs>
3801DECL_FORCE_INLINE_THROW(uint8_t)
3802iemNativeRegAllocTmpForGuestRegInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3803{
3804#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3805 AssertMsg( pReNative->idxCurCall == 0
3806 || enmGstReg == kIemNativeGstReg_Pc
3807 || (a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3808 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3809 : a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3810 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3811 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3812 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3813#endif
3814
3815 if RT_CONSTEXPR_IF(!a_fNonVolatileRegs)
3816 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3817 IEMNATIVE_HST_GREG_MASK
3818 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, enmGstReg);
3819 else /* keep else, is required by MSC */
3820 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3821 IEMNATIVE_HST_GREG_MASK
3822 & ~IEMNATIVE_REG_FIXED_MASK
3823 & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK>(pReNative, poff, enmGstReg);
3824}
3825
3826/* Variants including volatile registers: */
3827
3828DECL_HIDDEN_THROW(uint8_t)
3829iemNativeRegAllocTmpForGuestRegReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3830{
3831 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, false>(pReNative, poff, enmGstReg);
3832}
3833
3834DECL_HIDDEN_THROW(uint8_t)
3835iemNativeRegAllocTmpForGuestRegUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3836{
3837 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, false>(pReNative, poff, enmGstReg);
3838}
3839
3840DECL_HIDDEN_THROW(uint8_t)
3841iemNativeRegAllocTmpForGuestRegFullWrite(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3842{
3843 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, false>(pReNative, poff, enmGstReg);
3844}
3845
3846DECL_HIDDEN_THROW(uint8_t)
3847iemNativeRegAllocTmpForGuestRegCalculation(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3848{
3849 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, false>(pReNative, poff, enmGstReg);
3850}
3851
3852/* Variants excluding any volatile registers: */
3853
3854DECL_HIDDEN_THROW(uint8_t)
3855iemNativeRegAllocTmpForGuestRegReadOnlyNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3856{
3857 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, true>(pReNative, poff, enmGstReg);
3858}
3859
3860DECL_HIDDEN_THROW(uint8_t)
3861iemNativeRegAllocTmpForGuestRegUpdateNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3862{
3863 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, true>(pReNative, poff, enmGstReg);
3864}
3865
3866DECL_HIDDEN_THROW(uint8_t)
3867iemNativeRegAllocTmpForGuestRegFullWriteNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3868{
3869 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, true>(pReNative, poff, enmGstReg);
3870}
3871
3872DECL_HIDDEN_THROW(uint8_t)
3873iemNativeRegAllocTmpForGuestRegCalculationNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3874{
3875 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, true>(pReNative, poff, enmGstReg);
3876}
3877
3878
3879
3880#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3881/**
3882 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3883 *
3884 * This takes additional arguments for covering liveness assertions in strict
3885 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3886 * kIemNativeGstReg_EFlags as argument.
3887 */
3888template<IEMNATIVEGSTREGUSE const a_enmIntendedUse>
3889DECL_FORCE_INLINE_THROW(uint8_t)
3890iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3891 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3892{
3893 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3894 {
3895 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3896 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3897 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3898 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3899 uint32_t fState;
3900# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3901 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3902 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3903 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3904 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3905 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3906 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3907 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3908 ) \
3909 , ("%s - %u\n", #a_enmGstEfl, fState))
3910 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3911 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3912 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3913 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3914 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3915 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3916 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3917# undef MY_ASSERT_ONE_EFL
3918 }
3919 RT_NOREF(fPotentialCall);
3920
3921 AssertCompile(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate);
3922 if RT_CONSTEXPR_IF(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly)
3923 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ReadOnly,
3924 IEMNATIVE_CALL_VOLATILE_GREG_MASK
3925 & IEMNATIVE_HST_GREG_MASK
3926 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3927 else /* keep else, is required by MSC */
3928 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ForUpdate,
3929 IEMNATIVE_CALL_VOLATILE_GREG_MASK
3930 & IEMNATIVE_HST_GREG_MASK
3931 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3932}
3933
3934
3935DECL_HIDDEN_THROW(uint8_t)
3936iemNativeRegAllocTmpForGuestEFlagsReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3937 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3938{
3939 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ReadOnly>(pReNative, poff, fRead, fWrite, fPotentialCall);
3940}
3941
3942DECL_HIDDEN_THROW(uint8_t)
3943iemNativeRegAllocTmpForGuestEFlagsForUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3944 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3945{
3946 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ForUpdate>(pReNative, poff, fRead, fWrite, fPotentialCall);
3947}
3948
3949#endif
3950
3951
3952
3953/**
3954 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
3955 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
3956 *
3957 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
3958 */
3959DECL_FORCE_INLINE(uint8_t)
3960iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3961{
3962 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3963
3964 /*
3965 * First check if the guest register value is already in a host register.
3966 */
3967 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3968 {
3969 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3970 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3971 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3972 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3973
3974 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3975 {
3976 /*
3977 * We only do readonly use here, so easy compared to the other
3978 * variant of this code.
3979 */
3980 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3981 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3982 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3983 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3984 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3985
3986#ifdef VBOX_STRICT
3987 /* Strict builds: Check that the value is correct. */
3988 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3989#else
3990 RT_NOREF(poff);
3991#endif
3992 return idxReg;
3993 }
3994 }
3995
3996 return UINT8_MAX;
3997}
3998
3999
4000/**
4001 * Allocates a temporary host general purpose register that already holds the
4002 * given guest register value.
4003 *
4004 * The use case for this function is places where the shadowing state cannot be
4005 * modified due to branching and such. This will fail if the we don't have a
4006 * current shadow copy handy or if it's incompatible. The only code that will
4007 * be emitted here is value checking code in strict builds.
4008 *
4009 * The intended use can only be readonly!
4010 *
4011 * @returns The host register number, UINT8_MAX if not present.
4012 * @param pReNative The native recompile state.
4013 * @param poff Pointer to the instruction buffer offset.
4014 * Will be updated in strict builds if a register is
4015 * found.
4016 * @param enmGstReg The guest register that will is to be updated.
4017 * @note In strict builds, this may throw instruction buffer growth failures.
4018 * Non-strict builds will not throw anything.
4019 * @sa iemNativeRegAllocTmpForGuestReg
4020 */
4021DECL_HIDDEN_THROW(uint8_t)
4022iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4023{
4024#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4025 AssertMsg( pReNative->idxCurCall == 0
4026 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4027 || enmGstReg == kIemNativeGstReg_Pc
4028 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4029#endif
4030 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
4031}
4032
4033
4034#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
4035/**
4036 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
4037 * EFLAGS.
4038 *
4039 * This takes additional arguments for covering liveness assertions in strict
4040 * builds, it's otherwise the same as
4041 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
4042 * kIemNativeGstReg_EFlags as argument.
4043 *
4044 * @note The @a fWrite parameter is necessary to complete the liveness picture,
4045 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
4046 * commit. It the operation clobbers all the flags, @a fRead will be
4047 * zero, so better verify the whole picture while we're here.
4048 */
4049DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4050 uint64_t fRead, uint64_t fWrite /*=0*/)
4051{
4052 if (pReNative->idxCurCall != 0)
4053 {
4054 Assert(fRead | fWrite);
4055 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4056 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4057 uint64_t const fAll = fRead | fWrite;
4058 uint32_t fState;
4059# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
4060 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
4061 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
4062 || ( fRead & RT_BIT_64(a_enmGstEfl) \
4063 ? fWrite & RT_BIT_64(a_enmGstEfl) \
4064 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
4065 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
4066 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
4067 ) \
4068 , ("%s - %u\n", #a_enmGstEfl, fState))
4069 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
4070 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
4071 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
4072 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
4073 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
4074 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
4075 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
4076# undef MY_ASSERT_ONE_EFL
4077 }
4078 RT_NOREF(fRead);
4079 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
4080}
4081#endif
4082
4083
4084/**
4085 * Allocates argument registers for a function call.
4086 *
4087 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4088 * need to check the return value.
4089 * @param pReNative The native recompile state.
4090 * @param off The current code buffer offset.
4091 * @param cArgs The number of arguments the function call takes.
4092 */
4093DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4094{
4095 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4096 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4097 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4098 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4099
4100 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4101 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4102 else if (cArgs == 0)
4103 return true;
4104
4105 /*
4106 * Do we get luck and all register are free and not shadowing anything?
4107 */
4108 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4109 for (uint32_t i = 0; i < cArgs; i++)
4110 {
4111 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4112 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4113 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4114 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4115 }
4116 /*
4117 * Okay, not lucky so we have to free up the registers.
4118 */
4119 else
4120 for (uint32_t i = 0; i < cArgs; i++)
4121 {
4122 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4123 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4124 {
4125 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4126 {
4127 case kIemNativeWhat_Var:
4128 {
4129 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4130 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4131 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4132 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4133 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4134#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4135 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4136#endif
4137
4138 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4139 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4140 else
4141 {
4142 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4143 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4144 }
4145 break;
4146 }
4147
4148 case kIemNativeWhat_Tmp:
4149 case kIemNativeWhat_Arg:
4150 case kIemNativeWhat_rc:
4151 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4152 default:
4153 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4154 }
4155
4156 }
4157 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4158 {
4159 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4160 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4161 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4162#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4163 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4164#endif
4165 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4166 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4167 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4168 }
4169 else
4170 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4171 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4172 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4173 }
4174 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4175 return true;
4176}
4177
4178
4179DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4180
4181
4182#if 0
4183/**
4184 * Frees a register assignment of any type.
4185 *
4186 * @param pReNative The native recompile state.
4187 * @param idxHstReg The register to free.
4188 *
4189 * @note Does not update variables.
4190 */
4191DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4192{
4193 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4194 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4195 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4196 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4197 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4198 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4199 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4200 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4201 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4202 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4203 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4204 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4205 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4206 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4207
4208 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4209 /* no flushing, right:
4210 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4211 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4212 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4213 */
4214}
4215#endif
4216
4217
4218/**
4219 * Frees a temporary register.
4220 *
4221 * Any shadow copies of guest registers assigned to the host register will not
4222 * be flushed by this operation.
4223 */
4224DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4225{
4226 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4227 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4228 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4229 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4230 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4231}
4232
4233
4234/**
4235 * Frees a temporary immediate register.
4236 *
4237 * It is assumed that the call has not modified the register, so it still hold
4238 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4239 */
4240DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4241{
4242 iemNativeRegFreeTmp(pReNative, idxHstReg);
4243}
4244
4245
4246/**
4247 * Frees a register assigned to a variable.
4248 *
4249 * The register will be disassociated from the variable.
4250 */
4251DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4252{
4253 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4254 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4255 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4256 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4257 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4258#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4259 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4260#endif
4261
4262 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4263 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4264 if (!fFlushShadows)
4265 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4266 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4267 else
4268 {
4269 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4270 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4271#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4272 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4273#endif
4274 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4275 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4276 uint64_t fGstRegShadows = fGstRegShadowsOld;
4277 while (fGstRegShadows)
4278 {
4279 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4280 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4281
4282 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4283 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4284 }
4285 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4286 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4287 }
4288}
4289
4290
4291#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4292# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4293/** Host CPU SIMD register names. */
4294DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4295{
4296# ifdef RT_ARCH_AMD64
4297 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4298# elif RT_ARCH_ARM64
4299 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4300 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4301# else
4302# error "port me"
4303# endif
4304};
4305# endif
4306
4307
4308/**
4309 * Frees a SIMD register assigned to a variable.
4310 *
4311 * The register will be disassociated from the variable.
4312 */
4313DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4314{
4315 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4316 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4317 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4318 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4319 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4320 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4321
4322 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4323 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4324 if (!fFlushShadows)
4325 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4326 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4327 else
4328 {
4329 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4330 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4331 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4332 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4333 uint64_t fGstRegShadows = fGstRegShadowsOld;
4334 while (fGstRegShadows)
4335 {
4336 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4337 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4338
4339 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4340 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4341 }
4342 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4343 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4344 }
4345}
4346
4347
4348/**
4349 * Reassigns a variable to a different SIMD register specified by the caller.
4350 *
4351 * @returns The new code buffer position.
4352 * @param pReNative The native recompile state.
4353 * @param off The current code buffer position.
4354 * @param idxVar The variable index.
4355 * @param idxRegOld The old host register number.
4356 * @param idxRegNew The new host register number.
4357 * @param pszCaller The caller for logging.
4358 */
4359static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4360 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4361{
4362 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4363 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4364 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4365 RT_NOREF(pszCaller);
4366
4367 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4368 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4369 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4370
4371 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4372 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4373 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4374
4375 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4376 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4377 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4378
4379 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4380 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4381 else
4382 {
4383 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4384 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4385 }
4386
4387 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4388 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4389 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4390 if (fGstRegShadows)
4391 {
4392 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4393 | RT_BIT_32(idxRegNew);
4394 while (fGstRegShadows)
4395 {
4396 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4397 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4398
4399 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4400 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4401 }
4402 }
4403
4404 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4405 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4406 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4407 return off;
4408}
4409
4410
4411/**
4412 * Moves a variable to a different register or spills it onto the stack.
4413 *
4414 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4415 * kinds can easily be recreated if needed later.
4416 *
4417 * @returns The new code buffer position.
4418 * @param pReNative The native recompile state.
4419 * @param off The current code buffer position.
4420 * @param idxVar The variable index.
4421 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4422 * call-volatile registers.
4423 */
4424DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4425 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4426{
4427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4428 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4429 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4430 Assert(!pVar->fRegAcquired);
4431 Assert(!pVar->fSimdReg);
4432
4433 uint8_t const idxRegOld = pVar->idxReg;
4434 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4435 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4436 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4437 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4438 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4439 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4440 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4441 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4442 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4443 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4444
4445 /** @todo Add statistics on this.*/
4446 /** @todo Implement basic variable liveness analysis (python) so variables
4447 * can be freed immediately once no longer used. This has the potential to
4448 * be trashing registers and stack for dead variables.
4449 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4450
4451 /*
4452 * First try move it to a different register, as that's cheaper.
4453 */
4454 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4455 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4456 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4457 if (fRegs)
4458 {
4459 /* Avoid using shadow registers, if possible. */
4460 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4461 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4462 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4463 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4464 }
4465
4466 /*
4467 * Otherwise we must spill the register onto the stack.
4468 */
4469 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4470 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4471 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4472
4473 if (pVar->cbVar == sizeof(RTUINT128U))
4474 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4475 else
4476 {
4477 Assert(pVar->cbVar == sizeof(RTUINT256U));
4478 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4479 }
4480
4481 pVar->idxReg = UINT8_MAX;
4482 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4483 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4484 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4485 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4486 return off;
4487}
4488
4489
4490/**
4491 * Called right before emitting a call instruction to move anything important
4492 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4493 * optionally freeing argument variables.
4494 *
4495 * @returns New code buffer offset, UINT32_MAX on failure.
4496 * @param pReNative The native recompile state.
4497 * @param off The code buffer offset.
4498 * @param cArgs The number of arguments the function call takes.
4499 * It is presumed that the host register part of these have
4500 * been allocated as such already and won't need moving,
4501 * just freeing.
4502 * @param fKeepVars Mask of variables that should keep their register
4503 * assignments. Caller must take care to handle these.
4504 */
4505DECL_HIDDEN_THROW(uint32_t)
4506iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4507{
4508 Assert(!cArgs); RT_NOREF(cArgs);
4509
4510 /* fKeepVars will reduce this mask. */
4511 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4512
4513 /*
4514 * Move anything important out of volatile registers.
4515 */
4516 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4517#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4518 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4519#endif
4520 ;
4521
4522 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4523 if (!fSimdRegsToMove)
4524 { /* likely */ }
4525 else
4526 {
4527 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4528 while (fSimdRegsToMove != 0)
4529 {
4530 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4531 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4532
4533 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4534 {
4535 case kIemNativeWhat_Var:
4536 {
4537 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4538 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4539 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4540 Assert(pVar->idxReg == idxSimdReg);
4541 Assert(pVar->fSimdReg);
4542 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4543 {
4544 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4545 idxVar, pVar->enmKind, pVar->idxReg));
4546 if (pVar->enmKind != kIemNativeVarKind_Stack)
4547 pVar->idxReg = UINT8_MAX;
4548 else
4549 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4550 }
4551 else
4552 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4553 continue;
4554 }
4555
4556 case kIemNativeWhat_Arg:
4557 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4558 continue;
4559
4560 case kIemNativeWhat_rc:
4561 case kIemNativeWhat_Tmp:
4562 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4563 continue;
4564
4565 case kIemNativeWhat_FixedReserved:
4566#ifdef RT_ARCH_ARM64
4567 continue; /* On ARM the upper half of the virtual 256-bit register. */
4568#endif
4569
4570 case kIemNativeWhat_FixedTmp:
4571 case kIemNativeWhat_pVCpuFixed:
4572 case kIemNativeWhat_pCtxFixed:
4573 case kIemNativeWhat_PcShadow:
4574 case kIemNativeWhat_Invalid:
4575 case kIemNativeWhat_End:
4576 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4577 }
4578 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4579 }
4580 }
4581
4582 /*
4583 * Do the actual freeing.
4584 */
4585 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4586 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4587 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4588 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4589
4590 /* If there are guest register shadows in any call-volatile register, we
4591 have to clear the corrsponding guest register masks for each register. */
4592 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4593 if (fHstSimdRegsWithGstShadow)
4594 {
4595 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4596 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4597 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4598 do
4599 {
4600 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4601 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4602
4603 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4604
4605#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4606 /*
4607 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4608 * to call volatile registers).
4609 */
4610 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4611 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4612 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4613#endif
4614 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4615 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4616
4617 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4618 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4619 } while (fHstSimdRegsWithGstShadow != 0);
4620 }
4621
4622 return off;
4623}
4624#endif
4625
4626
4627/**
4628 * Called right before emitting a call instruction to move anything important
4629 * out of call-volatile registers, free and flush the call-volatile registers,
4630 * optionally freeing argument variables.
4631 *
4632 * @returns New code buffer offset, UINT32_MAX on failure.
4633 * @param pReNative The native recompile state.
4634 * @param off The code buffer offset.
4635 * @param cArgs The number of arguments the function call takes.
4636 * It is presumed that the host register part of these have
4637 * been allocated as such already and won't need moving,
4638 * just freeing.
4639 * @param fKeepVars Mask of variables that should keep their register
4640 * assignments. Caller must take care to handle these.
4641 */
4642DECL_HIDDEN_THROW(uint32_t)
4643iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4644{
4645 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4646
4647 /* fKeepVars will reduce this mask. */
4648 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4649
4650#ifdef RT_ARCH_ARM64
4651AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4652#endif
4653
4654 /*
4655 * Move anything important out of volatile registers.
4656 */
4657 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4658 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4659 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4660#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4661 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4662#endif
4663 & ~g_afIemNativeCallRegs[cArgs];
4664
4665 fRegsToMove &= pReNative->Core.bmHstRegs;
4666 if (!fRegsToMove)
4667 { /* likely */ }
4668 else
4669 {
4670 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4671 while (fRegsToMove != 0)
4672 {
4673 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4674 fRegsToMove &= ~RT_BIT_32(idxReg);
4675
4676 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4677 {
4678 case kIemNativeWhat_Var:
4679 {
4680 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4681 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4682 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4683 Assert(pVar->idxReg == idxReg);
4684#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4685 Assert(!pVar->fSimdReg);
4686#endif
4687 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4688 {
4689 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4690 idxVar, pVar->enmKind, pVar->idxReg));
4691 if (pVar->enmKind != kIemNativeVarKind_Stack)
4692 pVar->idxReg = UINT8_MAX;
4693 else
4694 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4695 }
4696 else
4697 fRegsToFree &= ~RT_BIT_32(idxReg);
4698 continue;
4699 }
4700
4701 case kIemNativeWhat_Arg:
4702 AssertMsgFailed(("What?!?: %u\n", idxReg));
4703 continue;
4704
4705 case kIemNativeWhat_rc:
4706 case kIemNativeWhat_Tmp:
4707 AssertMsgFailed(("Missing free: %u\n", idxReg));
4708 continue;
4709
4710 case kIemNativeWhat_FixedTmp:
4711 case kIemNativeWhat_pVCpuFixed:
4712 case kIemNativeWhat_pCtxFixed:
4713 case kIemNativeWhat_PcShadow:
4714 case kIemNativeWhat_FixedReserved:
4715 case kIemNativeWhat_Invalid:
4716 case kIemNativeWhat_End:
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4718 }
4719 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4720 }
4721 }
4722
4723 /*
4724 * Do the actual freeing.
4725 */
4726 if (pReNative->Core.bmHstRegs & fRegsToFree)
4727 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4728 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4729 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4730
4731 /* If there are guest register shadows in any call-volatile register, we
4732 have to clear the corrsponding guest register masks for each register. */
4733 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4734 if (fHstRegsWithGstShadow)
4735 {
4736 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4737 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4738 fHstRegsWithGstShadow));
4739 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4740 do
4741 {
4742 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4743 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4744
4745 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4746
4747#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4748 /*
4749 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4750 * to call volatile registers).
4751 */
4752 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4753 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4754 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4755#endif
4756
4757 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4758 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4759 } while (fHstRegsWithGstShadow != 0);
4760 }
4761
4762#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4763 /* Now for the SIMD registers, no argument support for now. */
4764 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4765#endif
4766
4767 return off;
4768}
4769
4770
4771/**
4772 * Flushes a set of guest register shadow copies.
4773 *
4774 * This is usually done after calling a threaded function or a C-implementation
4775 * of an instruction.
4776 *
4777 * @param pReNative The native recompile state.
4778 * @param fGstRegs Set of guest registers to flush.
4779 */
4780DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4781{
4782 /*
4783 * Reduce the mask by what's currently shadowed
4784 */
4785 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4786 fGstRegs &= bmGstRegShadowsOld;
4787 if (fGstRegs)
4788 {
4789 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4790 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4791 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4792 if (bmGstRegShadowsNew)
4793 {
4794 /*
4795 * Partial.
4796 */
4797 do
4798 {
4799 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4800 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4801 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4802 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4803 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4804#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4805 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4806#endif
4807
4808 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4809 fGstRegs &= ~fInThisHstReg;
4810 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4811 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4812 if (!fGstRegShadowsNew)
4813 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4814 } while (fGstRegs != 0);
4815 }
4816 else
4817 {
4818 /*
4819 * Clear all.
4820 */
4821 do
4822 {
4823 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4824 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4825 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4826 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4827 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4828#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4829 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4830#endif
4831
4832 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4833 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4834 } while (fGstRegs != 0);
4835 pReNative->Core.bmHstRegsWithGstShadow = 0;
4836 }
4837 }
4838}
4839
4840
4841/**
4842 * Flushes guest register shadow copies held by a set of host registers.
4843 *
4844 * This is used with the TLB lookup code for ensuring that we don't carry on
4845 * with any guest shadows in volatile registers, as these will get corrupted by
4846 * a TLB miss.
4847 *
4848 * @param pReNative The native recompile state.
4849 * @param fHstRegs Set of host registers to flush guest shadows for.
4850 */
4851DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4852{
4853 /*
4854 * Reduce the mask by what's currently shadowed.
4855 */
4856 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4857 fHstRegs &= bmHstRegsWithGstShadowOld;
4858 if (fHstRegs)
4859 {
4860 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4861 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4862 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4863 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4864 if (bmHstRegsWithGstShadowNew)
4865 {
4866 /*
4867 * Partial (likely).
4868 */
4869 uint64_t fGstShadows = 0;
4870 do
4871 {
4872 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4873 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4874 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4875 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4876#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4877 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4878#endif
4879
4880 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4881 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4882 fHstRegs &= ~RT_BIT_32(idxHstReg);
4883 } while (fHstRegs != 0);
4884 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4885 }
4886 else
4887 {
4888 /*
4889 * Clear all.
4890 */
4891 do
4892 {
4893 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4894 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4895 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4896 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4897#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4898 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4899#endif
4900
4901 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4902 fHstRegs &= ~RT_BIT_32(idxHstReg);
4903 } while (fHstRegs != 0);
4904 pReNative->Core.bmGstRegShadows = 0;
4905 }
4906 }
4907}
4908
4909
4910/**
4911 * Restores guest shadow copies in volatile registers.
4912 *
4913 * This is used after calling a helper function (think TLB miss) to restore the
4914 * register state of volatile registers.
4915 *
4916 * @param pReNative The native recompile state.
4917 * @param off The code buffer offset.
4918 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4919 * be active (allocated) w/o asserting. Hack.
4920 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4921 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4922 */
4923DECL_HIDDEN_THROW(uint32_t)
4924iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4925{
4926 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4927 if (fHstRegs)
4928 {
4929 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4930 do
4931 {
4932 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4933
4934 /* It's not fatal if a register is active holding a variable that
4935 shadowing a guest register, ASSUMING all pending guest register
4936 writes were flushed prior to the helper call. However, we'll be
4937 emitting duplicate restores, so it wasts code space. */
4938 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4939 RT_NOREF(fHstRegsActiveShadows);
4940
4941 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4942#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4943 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4944#endif
4945 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4946 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4947 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4948
4949 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4950 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4951
4952 fHstRegs &= ~RT_BIT_32(idxHstReg);
4953 } while (fHstRegs != 0);
4954 }
4955 return off;
4956}
4957
4958
4959
4960
4961/*********************************************************************************************************************************
4962* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4963*********************************************************************************************************************************/
4964#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4965
4966/**
4967 * Info about shadowed guest SIMD register values.
4968 * @see IEMNATIVEGSTSIMDREG
4969 */
4970static struct
4971{
4972 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4973 uint32_t offXmm;
4974 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4975 uint32_t offYmm;
4976 /** Name (for logging). */
4977 const char *pszName;
4978} const g_aGstSimdShadowInfo[] =
4979{
4980#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4981 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4982 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4983 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4984 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4985 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4986 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4987 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4988 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4989 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4990 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4991 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4992 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4993 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4994 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4995 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4996 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4997 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4998#undef CPUMCTX_OFF_AND_SIZE
4999};
5000AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5001
5002
5003/**
5004 * Frees a temporary SIMD register.
5005 *
5006 * Any shadow copies of guest registers assigned to the host register will not
5007 * be flushed by this operation.
5008 */
5009DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5010{
5011 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5012 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5013 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5014 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5015 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5016}
5017
5018
5019/**
5020 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5021 *
5022 * @returns New code bufferoffset.
5023 * @param pReNative The native recompile state.
5024 * @param off Current code buffer position.
5025 * @param enmGstSimdReg The guest SIMD register to flush.
5026 */
5027DECL_HIDDEN_THROW(uint32_t)
5028iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5029{
5030 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5031
5032 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5033 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5034 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5035 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5036
5037 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5038 {
5039 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5040 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5041 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5042 }
5043
5044 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5045 {
5046 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5047 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5048 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5049 }
5050
5051 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5052 return off;
5053}
5054
5055
5056/**
5057 * Flush the given set of guest SIMD registers if marked as dirty.
5058 *
5059 * @returns New code buffer offset.
5060 * @param pReNative The native recompile state.
5061 * @param off Current code buffer position.
5062 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5063 */
5064DECL_HIDDEN_THROW(uint32_t)
5065iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5066{
5067 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5068 & fFlushGstSimdReg;
5069 if (bmGstSimdRegShadowDirty)
5070 {
5071# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5072 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5073 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5074# endif
5075
5076 do
5077 {
5078 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5079 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5080 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5081 } while (bmGstSimdRegShadowDirty);
5082 }
5083
5084 return off;
5085}
5086
5087
5088#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5089/**
5090 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5091 *
5092 * @returns New code buffer offset.
5093 * @param pReNative The native recompile state.
5094 * @param off Current code buffer position.
5095 * @param idxHstSimdReg The host SIMD register.
5096 *
5097 * @note This doesn't do any unshadowing of guest registers from the host register.
5098 */
5099DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5100{
5101 /* We need to flush any pending guest register writes this host register shadows. */
5102 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5103 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5104 if (bmGstSimdRegShadowDirty)
5105 {
5106# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5107 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5108 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5109# endif
5110
5111 do
5112 {
5113 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5114 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5115 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5116 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5117 } while (bmGstSimdRegShadowDirty);
5118 }
5119
5120 return off;
5121}
5122#endif
5123
5124
5125/**
5126 * Locate a register, possibly freeing one up.
5127 *
5128 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5129 * failed.
5130 *
5131 * @returns Host register number on success. Returns UINT8_MAX if no registers
5132 * found, the caller is supposed to deal with this and raise a
5133 * allocation type specific status code (if desired).
5134 *
5135 * @throws VBox status code if we're run into trouble spilling a variable of
5136 * recording debug info. Does NOT throw anything if we're out of
5137 * registers, though.
5138 */
5139static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5140 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5141{
5142 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5143 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5144 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5145
5146 /*
5147 * Try a freed register that's shadowing a guest register.
5148 */
5149 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5150 if (fRegs)
5151 {
5152 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5153
5154#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5155 /*
5156 * When we have livness information, we use it to kick out all shadowed
5157 * guest register that will not be needed any more in this TB. If we're
5158 * lucky, this may prevent us from ending up here again.
5159 *
5160 * Note! We must consider the previous entry here so we don't free
5161 * anything that the current threaded function requires (current
5162 * entry is produced by the next threaded function).
5163 */
5164 uint32_t const idxCurCall = pReNative->idxCurCall;
5165 if (idxCurCall > 0)
5166 {
5167 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5168 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5169
5170 /* If it matches any shadowed registers. */
5171 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5172 {
5173 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5174 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5175 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5176
5177 /* See if we've got any unshadowed registers we can return now. */
5178 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5179 if (fUnshadowedRegs)
5180 {
5181 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5182 return (fPreferVolatile
5183 ? ASMBitFirstSetU32(fUnshadowedRegs)
5184 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5185 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5186 - 1;
5187 }
5188 }
5189 }
5190#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5191
5192 unsigned const idxReg = (fPreferVolatile
5193 ? ASMBitFirstSetU32(fRegs)
5194 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5195 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5196 - 1;
5197
5198 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5199 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5200 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5201 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5202
5203 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5204 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5205
5206 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5207 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5208 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5209 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5210 return idxReg;
5211 }
5212
5213 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5214
5215 /*
5216 * Try free up a variable that's in a register.
5217 *
5218 * We do two rounds here, first evacuating variables we don't need to be
5219 * saved on the stack, then in the second round move things to the stack.
5220 */
5221 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5222 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5223 {
5224 uint32_t fVars = pReNative->Core.bmVars;
5225 while (fVars)
5226 {
5227 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5228 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5229 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5230 continue;
5231
5232 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5233 && (RT_BIT_32(idxReg) & fRegMask)
5234 && ( iLoop == 0
5235 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5236 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5237 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5238 {
5239 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5240 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5241 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5242 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5243 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5244 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5245
5246 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5247 {
5248 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5249 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5250 }
5251
5252 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5253 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5254
5255 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5256 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5257 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5258 return idxReg;
5259 }
5260 fVars &= ~RT_BIT_32(idxVar);
5261 }
5262 }
5263
5264 AssertFailed();
5265 return UINT8_MAX;
5266}
5267
5268
5269/**
5270 * Flushes a set of guest register shadow copies.
5271 *
5272 * This is usually done after calling a threaded function or a C-implementation
5273 * of an instruction.
5274 *
5275 * @param pReNative The native recompile state.
5276 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5277 */
5278DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5279{
5280 /*
5281 * Reduce the mask by what's currently shadowed
5282 */
5283 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5284 fGstSimdRegs &= bmGstSimdRegShadows;
5285 if (fGstSimdRegs)
5286 {
5287 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5288 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5289 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5290 if (bmGstSimdRegShadowsNew)
5291 {
5292 /*
5293 * Partial.
5294 */
5295 do
5296 {
5297 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5298 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5299 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5300 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5301 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5302 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5303
5304 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5305 fGstSimdRegs &= ~fInThisHstReg;
5306 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5307 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5308 if (!fGstRegShadowsNew)
5309 {
5310 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5311 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5312 }
5313 } while (fGstSimdRegs != 0);
5314 }
5315 else
5316 {
5317 /*
5318 * Clear all.
5319 */
5320 do
5321 {
5322 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5323 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5324 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5325 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5326 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5327 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5328
5329 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5330 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5331 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5332 } while (fGstSimdRegs != 0);
5333 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5334 }
5335 }
5336}
5337
5338
5339/**
5340 * Allocates a temporary host SIMD register.
5341 *
5342 * This may emit code to save register content onto the stack in order to free
5343 * up a register.
5344 *
5345 * @returns The host register number; throws VBox status code on failure,
5346 * so no need to check the return value.
5347 * @param pReNative The native recompile state.
5348 * @param poff Pointer to the variable with the code buffer position.
5349 * This will be update if we need to move a variable from
5350 * register to stack in order to satisfy the request.
5351 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5352 * registers (@c true, default) or the other way around
5353 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5354 */
5355DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5356{
5357 /*
5358 * Try find a completely unused register, preferably a call-volatile one.
5359 */
5360 uint8_t idxSimdReg;
5361 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5362 & ~pReNative->Core.bmHstRegsWithGstShadow
5363 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5364 if (fRegs)
5365 {
5366 if (fPreferVolatile)
5367 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5368 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5369 else
5370 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5371 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5372 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5373 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5374
5375 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5376 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5377 }
5378 else
5379 {
5380 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5381 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5382 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5383 }
5384
5385 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5386 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5387}
5388
5389
5390/**
5391 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5392 * registers.
5393 *
5394 * @returns The host register number; throws VBox status code on failure,
5395 * so no need to check the return value.
5396 * @param pReNative The native recompile state.
5397 * @param poff Pointer to the variable with the code buffer position.
5398 * This will be update if we need to move a variable from
5399 * register to stack in order to satisfy the request.
5400 * @param fRegMask Mask of acceptable registers.
5401 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5402 * registers (@c true, default) or the other way around
5403 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5404 */
5405DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5406 bool fPreferVolatile /*= true*/)
5407{
5408 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5409 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5410
5411 /*
5412 * Try find a completely unused register, preferably a call-volatile one.
5413 */
5414 uint8_t idxSimdReg;
5415 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5416 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5417 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5418 & fRegMask;
5419 if (fRegs)
5420 {
5421 if (fPreferVolatile)
5422 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5423 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5424 else
5425 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5426 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5427 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5428 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5429
5430 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5431 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5432 }
5433 else
5434 {
5435 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5436 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5437 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5438 }
5439
5440 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5441 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5442}
5443
5444
5445/**
5446 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5447 *
5448 * @param pReNative The native recompile state.
5449 * @param idxHstSimdReg The host SIMD register to update the state for.
5450 * @param enmLoadSz The load size to set.
5451 */
5452DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5453 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5454{
5455 /* Everything valid already? -> nothing to do. */
5456 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5457 return;
5458
5459 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5460 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5461 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5462 {
5463 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5464 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5465 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5466 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5467 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5468 }
5469}
5470
5471
5472static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5473 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5474{
5475 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5476 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5477 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5478 {
5479# ifdef RT_ARCH_ARM64
5480 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5481 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5482# endif
5483
5484 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5485 {
5486 switch (enmLoadSzDst)
5487 {
5488 case kIemNativeGstSimdRegLdStSz_256:
5489 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5490 break;
5491 case kIemNativeGstSimdRegLdStSz_Low128:
5492 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5493 break;
5494 case kIemNativeGstSimdRegLdStSz_High128:
5495 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5496 break;
5497 default:
5498 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5499 }
5500
5501 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5502 }
5503 }
5504 else
5505 {
5506 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5507 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5508 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5509 }
5510
5511 return off;
5512}
5513
5514
5515/**
5516 * Allocates a temporary host SIMD register for keeping a guest
5517 * SIMD register value.
5518 *
5519 * Since we may already have a register holding the guest register value,
5520 * code will be emitted to do the loading if that's not the case. Code may also
5521 * be emitted if we have to free up a register to satify the request.
5522 *
5523 * @returns The host register number; throws VBox status code on failure, so no
5524 * need to check the return value.
5525 * @param pReNative The native recompile state.
5526 * @param poff Pointer to the variable with the code buffer
5527 * position. This will be update if we need to move a
5528 * variable from register to stack in order to satisfy
5529 * the request.
5530 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5531 * @param enmIntendedUse How the caller will be using the host register.
5532 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5533 * register is okay (default). The ASSUMPTION here is
5534 * that the caller has already flushed all volatile
5535 * registers, so this is only applied if we allocate a
5536 * new register.
5537 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5538 */
5539DECL_HIDDEN_THROW(uint8_t)
5540iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5541 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5542 bool fNoVolatileRegs /*= false*/)
5543{
5544 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5545#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5546 AssertMsg( pReNative->idxCurCall == 0
5547 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5548 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5549 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5550 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5551 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5552 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5553#endif
5554#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5555 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5556#endif
5557 uint32_t const fRegMask = !fNoVolatileRegs
5558 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5559 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5560
5561 /*
5562 * First check if the guest register value is already in a host register.
5563 */
5564 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5565 {
5566 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5567 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5568 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5569 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5570
5571 /* It's not supposed to be allocated... */
5572 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5573 {
5574 /*
5575 * If the register will trash the guest shadow copy, try find a
5576 * completely unused register we can use instead. If that fails,
5577 * we need to disassociate the host reg from the guest reg.
5578 */
5579 /** @todo would be nice to know if preserving the register is in any way helpful. */
5580 /* If the purpose is calculations, try duplicate the register value as
5581 we'll be clobbering the shadow. */
5582 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5583 && ( ~pReNative->Core.bmHstSimdRegs
5584 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5585 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5586 {
5587 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5588
5589 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5590
5591 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5592 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5593 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5594 idxSimdReg = idxRegNew;
5595 }
5596 /* If the current register matches the restrictions, go ahead and allocate
5597 it for the caller. */
5598 else if (fRegMask & RT_BIT_32(idxSimdReg))
5599 {
5600 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5601 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5602 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5603 {
5604 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5605 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5606 else
5607 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5608 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5609 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5610 }
5611 else
5612 {
5613 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5614 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5615 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5616 }
5617 }
5618 /* Otherwise, allocate a register that satisfies the caller and transfer
5619 the shadowing if compatible with the intended use. (This basically
5620 means the call wants a non-volatile register (RSP push/pop scenario).) */
5621 else
5622 {
5623 Assert(fNoVolatileRegs);
5624 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5625 !fNoVolatileRegs
5626 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5627 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5628 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5629 {
5630 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5631 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5632 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5633 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5634 }
5635 else
5636 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5637 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5638 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5639 idxSimdReg = idxRegNew;
5640 }
5641 }
5642 else
5643 {
5644 /*
5645 * Oops. Shadowed guest register already allocated!
5646 *
5647 * Allocate a new register, copy the value and, if updating, the
5648 * guest shadow copy assignment to the new register.
5649 */
5650 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5651 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5652 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5653 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5654
5655 /** @todo share register for readonly access. */
5656 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5657 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5658
5659 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5660 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5661 else
5662 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5663
5664 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5665 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5666 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5667 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5668 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5669 else
5670 {
5671 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5672 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5673 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5674 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5675 }
5676 idxSimdReg = idxRegNew;
5677 }
5678 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5679
5680#ifdef VBOX_STRICT
5681 /* Strict builds: Check that the value is correct. */
5682 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5683 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5684#endif
5685
5686 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5687 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5688 {
5689# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5690 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5691 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5692# endif
5693
5694 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5695 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5696 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5697 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5698 else
5699 {
5700 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5701 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5702 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5703 }
5704 }
5705
5706 return idxSimdReg;
5707 }
5708
5709 /*
5710 * Allocate a new register, load it with the guest value and designate it as a copy of the
5711 */
5712 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5713
5714 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5715 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5716 else
5717 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5718
5719 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5720 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5721
5722 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5723 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5724 {
5725# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5726 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5727 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5728# endif
5729
5730 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5731 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5732 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5733 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5734 else
5735 {
5736 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5737 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5738 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5739 }
5740 }
5741
5742 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5743 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5744
5745 return idxRegNew;
5746}
5747
5748
5749/**
5750 * Flushes guest SIMD register shadow copies held by a set of host registers.
5751 *
5752 * This is used whenever calling an external helper for ensuring that we don't carry on
5753 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5754 *
5755 * @param pReNative The native recompile state.
5756 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5757 */
5758DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5759{
5760 /*
5761 * Reduce the mask by what's currently shadowed.
5762 */
5763 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5764 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5765 if (fHstSimdRegs)
5766 {
5767 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5768 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5769 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5770 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5771 if (bmHstSimdRegsWithGstShadowNew)
5772 {
5773 /*
5774 * Partial (likely).
5775 */
5776 uint64_t fGstShadows = 0;
5777 do
5778 {
5779 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5780 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5781 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5782 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5783 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5784 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5785
5786 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5787 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5788 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5789 } while (fHstSimdRegs != 0);
5790 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5791 }
5792 else
5793 {
5794 /*
5795 * Clear all.
5796 */
5797 do
5798 {
5799 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5800 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5801 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5802 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5803 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5804 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5805
5806 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5807 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5808 } while (fHstSimdRegs != 0);
5809 pReNative->Core.bmGstSimdRegShadows = 0;
5810 }
5811 }
5812}
5813#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5814
5815
5816
5817/*********************************************************************************************************************************
5818* Code emitters for flushing pending guest register writes and sanity checks *
5819*********************************************************************************************************************************/
5820
5821#ifdef VBOX_STRICT
5822/**
5823 * Does internal register allocator sanity checks.
5824 */
5825DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5826{
5827 /*
5828 * Iterate host registers building a guest shadowing set.
5829 */
5830 uint64_t bmGstRegShadows = 0;
5831 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5832 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5833 while (bmHstRegsWithGstShadow)
5834 {
5835 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5836 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5837 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5838
5839 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5840 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5841 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5842 bmGstRegShadows |= fThisGstRegShadows;
5843 while (fThisGstRegShadows)
5844 {
5845 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5846 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5847 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5848 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5849 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5850 }
5851 }
5852 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5853 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5854 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5855
5856 /*
5857 * Now the other way around, checking the guest to host index array.
5858 */
5859 bmHstRegsWithGstShadow = 0;
5860 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5861 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5862 while (bmGstRegShadows)
5863 {
5864 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5865 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5866 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5867
5868 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5869 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5870 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5871 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5872 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5873 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5874 }
5875 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5876 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5877 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5878}
5879#endif /* VBOX_STRICT */
5880
5881
5882/**
5883 * Flushes any delayed guest register writes.
5884 *
5885 * This must be called prior to calling CImpl functions and any helpers that use
5886 * the guest state (like raising exceptions) and such.
5887 *
5888 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5889 * the caller if it wishes to do so.
5890 */
5891DECL_HIDDEN_THROW(uint32_t)
5892iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5893{
5894#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5895 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5896 off = iemNativeEmitPcWriteback(pReNative, off);
5897#else
5898 RT_NOREF(pReNative, fGstShwExcept);
5899#endif
5900
5901#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5902 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5903#endif
5904
5905#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5906 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5907#endif
5908
5909 return off;
5910}
5911
5912#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5913
5914# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5915
5916/**
5917 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5918 */
5919DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5920{
5921 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5922 Assert(pReNative->Core.fDebugPcInitialized);
5923
5924 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5925# ifdef RT_ARCH_AMD64
5926 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5927 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5928 pCodeBuf[off++] = 0x3b;
5929 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5930# else
5931 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5932 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5933 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5934# endif
5935
5936 uint32_t offFixup = off;
5937 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5938 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5939 iemNativeFixupFixedJump(pReNative, offFixup, off);
5940
5941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5942 return off;
5943}
5944
5945
5946/**
5947 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5948 */
5949DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5950{
5951 if (pReNative->Core.fDebugPcInitialized)
5952 {
5953 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5954 if (pReNative->Core.offPc)
5955 {
5956 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5957 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5958 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5959 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5960 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5961 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5962 }
5963 else
5964 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5965 iemNativeRegFreeTmp(pReNative, idxPcReg);
5966 }
5967 return off;
5968}
5969
5970# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
5971
5972/**
5973 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5974 */
5975DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5976{
5977 Assert(pReNative->Core.offPc);
5978# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
5979 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
5980# else
5981 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
5982 uint8_t idxCurCall = pReNative->idxCurCall;
5983 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
5984 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
5985 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
5986 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
5987 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
5988 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
5989 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
5990
5991 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
5992
5993# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5994 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5995 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
5996# endif
5997# endif
5998
5999# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6000 /* Allocate a temporary PC register. */
6001 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6002
6003 /* Perform the addition and store the result. */
6004 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6005 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6006# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6007 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6008# endif
6009
6010 /* Free but don't flush the PC register. */
6011 iemNativeRegFreeTmp(pReNative, idxPcReg);
6012# else
6013 /* Compare the shadow with the context value, they should match. */
6014 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6015 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6016# endif
6017
6018 pReNative->Core.offPc = 0;
6019
6020 return off;
6021}
6022
6023#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6024
6025
6026/*********************************************************************************************************************************
6027* Code Emitters (larger snippets) *
6028*********************************************************************************************************************************/
6029
6030/**
6031 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6032 * extending to 64-bit width.
6033 *
6034 * @returns New code buffer offset on success, UINT32_MAX on failure.
6035 * @param pReNative .
6036 * @param off The current code buffer position.
6037 * @param idxHstReg The host register to load the guest register value into.
6038 * @param enmGstReg The guest register to load.
6039 *
6040 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6041 * that is something the caller needs to do if applicable.
6042 */
6043DECL_HIDDEN_THROW(uint32_t)
6044iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6045{
6046 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6047 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6048
6049 switch (g_aGstShadowInfo[enmGstReg].cb)
6050 {
6051 case sizeof(uint64_t):
6052 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6053 case sizeof(uint32_t):
6054 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6055 case sizeof(uint16_t):
6056 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6057#if 0 /* not present in the table. */
6058 case sizeof(uint8_t):
6059 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6060#endif
6061 default:
6062 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6063 }
6064}
6065
6066
6067/**
6068 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6069 * extending to 64-bit width, extended version.
6070 *
6071 * @returns New code buffer offset on success, UINT32_MAX on failure.
6072 * @param pCodeBuf The code buffer.
6073 * @param off The current code buffer position.
6074 * @param idxHstReg The host register to load the guest register value into.
6075 * @param enmGstReg The guest register to load.
6076 *
6077 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6078 * that is something the caller needs to do if applicable.
6079 */
6080DECL_HIDDEN_THROW(uint32_t)
6081iemNativeEmitLoadGprWithGstShadowRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6082{
6083 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6084 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6085
6086 switch (g_aGstShadowInfo[enmGstReg].cb)
6087 {
6088 case sizeof(uint64_t):
6089 return iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6090 case sizeof(uint32_t):
6091 return iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6092 case sizeof(uint16_t):
6093 return iemNativeEmitLoadGprFromVCpuU16Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6094#if 0 /* not present in the table. */
6095 case sizeof(uint8_t):
6096 return iemNativeEmitLoadGprFromVCpuU8Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6097#endif
6098 default:
6099#ifdef IEM_WITH_THROW_CATCH
6100 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6101#else
6102 AssertReleaseFailedReturn(off);
6103#endif
6104 }
6105}
6106
6107
6108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6109/**
6110 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6111 *
6112 * @returns New code buffer offset on success, UINT32_MAX on failure.
6113 * @param pReNative The recompiler state.
6114 * @param off The current code buffer position.
6115 * @param idxHstSimdReg The host register to load the guest register value into.
6116 * @param enmGstSimdReg The guest register to load.
6117 * @param enmLoadSz The load size of the register.
6118 *
6119 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6120 * that is something the caller needs to do if applicable.
6121 */
6122DECL_HIDDEN_THROW(uint32_t)
6123iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6124 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6125{
6126 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6127
6128 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6129 switch (enmLoadSz)
6130 {
6131 case kIemNativeGstSimdRegLdStSz_256:
6132 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6133 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6134 case kIemNativeGstSimdRegLdStSz_Low128:
6135 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6136 case kIemNativeGstSimdRegLdStSz_High128:
6137 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6138 default:
6139 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6140 }
6141}
6142#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6143
6144#ifdef VBOX_STRICT
6145
6146/**
6147 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6148 *
6149 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6150 * Trashes EFLAGS on AMD64.
6151 */
6152DECL_FORCE_INLINE(uint32_t)
6153iemNativeEmitTop32BitsClearCheckEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxReg)
6154{
6155# ifdef RT_ARCH_AMD64
6156 /* rol reg64, 32 */
6157 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6158 pCodeBuf[off++] = 0xc1;
6159 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6160 pCodeBuf[off++] = 32;
6161
6162 /* test reg32, ffffffffh */
6163 if (idxReg >= 8)
6164 pCodeBuf[off++] = X86_OP_REX_B;
6165 pCodeBuf[off++] = 0xf7;
6166 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6167 pCodeBuf[off++] = 0xff;
6168 pCodeBuf[off++] = 0xff;
6169 pCodeBuf[off++] = 0xff;
6170 pCodeBuf[off++] = 0xff;
6171
6172 /* je/jz +1 */
6173 pCodeBuf[off++] = 0x74;
6174 pCodeBuf[off++] = 0x01;
6175
6176 /* int3 */
6177 pCodeBuf[off++] = 0xcc;
6178
6179 /* rol reg64, 32 */
6180 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6181 pCodeBuf[off++] = 0xc1;
6182 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6183 pCodeBuf[off++] = 32;
6184
6185# elif defined(RT_ARCH_ARM64)
6186 /* lsr tmp0, reg64, #32 */
6187 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6188 /* cbz tmp0, +1 */
6189 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6190 /* brk #0x1100 */
6191 pCodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6192
6193# else
6194# error "Port me!"
6195# endif
6196 return off;
6197}
6198
6199
6200/**
6201 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6202 *
6203 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6204 * Trashes EFLAGS on AMD64.
6205 */
6206DECL_HIDDEN_THROW(uint32_t)
6207iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6208{
6209# ifdef RT_ARCH_AMD64
6210 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6211# elif defined(RT_ARCH_ARM64)
6212 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6213# else
6214# error "Port me!"
6215# endif
6216 off = iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6217 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6218 return off;
6219}
6220
6221
6222/**
6223 * Emitting code that checks that the content of register @a idxReg is the same
6224 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6225 * instruction if that's not the case.
6226 *
6227 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6228 * Trashes EFLAGS on AMD64.
6229 */
6230DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheckEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf,
6231 uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6232{
6233#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6234 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6235 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6236 return off;
6237#endif
6238
6239# ifdef RT_ARCH_AMD64
6240 /* cmp reg, [mem] */
6241 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6242 {
6243 if (idxReg >= 8)
6244 pCodeBuf[off++] = X86_OP_REX_R;
6245 pCodeBuf[off++] = 0x38;
6246 }
6247 else
6248 {
6249 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6250 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6251 else
6252 {
6253 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6254 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6255 else
6256 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6257 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6258 if (idxReg >= 8)
6259 pCodeBuf[off++] = X86_OP_REX_R;
6260 }
6261 pCodeBuf[off++] = 0x39;
6262 }
6263 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6264
6265 /* je/jz +1 */
6266 pCodeBuf[off++] = 0x74;
6267 pCodeBuf[off++] = 0x01;
6268
6269 /* int3 */
6270 pCodeBuf[off++] = 0xcc;
6271
6272 /* For values smaller than the register size, we must check that the rest
6273 of the register is all zeros. */
6274 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6275 {
6276 /* test reg64, imm32 */
6277 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6278 pCodeBuf[off++] = 0xf7;
6279 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6280 pCodeBuf[off++] = 0;
6281 pCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6282 pCodeBuf[off++] = 0xff;
6283 pCodeBuf[off++] = 0xff;
6284
6285 /* je/jz +1 */
6286 pCodeBuf[off++] = 0x74;
6287 pCodeBuf[off++] = 0x01;
6288
6289 /* int3 */
6290 pCodeBuf[off++] = 0xcc;
6291 }
6292 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6293 iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6294 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6295
6296# elif defined(RT_ARCH_ARM64)
6297 /* mov TMP0, [gstreg] */
6298 off = iemNativeEmitLoadGprWithGstShadowRegEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6299
6300 /* sub tmp0, tmp0, idxReg */
6301 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6302 /* cbz tmp0, +2 */
6303 pCodeBuf[off++] = Armv8A64MkInstrCbz(2, IEMNATIVE_REG_FIXED_TMP0);
6304 /* brk #0x1000+enmGstReg */
6305 pCodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6306 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6307
6308# else
6309# error "Port me!"
6310# endif
6311 return off;
6312}
6313
6314
6315/**
6316 * Emitting code that checks that the content of register @a idxReg is the same
6317 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6318 * instruction if that's not the case.
6319 *
6320 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6321 * Trashes EFLAGS on AMD64.
6322 */
6323DECL_HIDDEN_THROW(uint32_t)
6324iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6325{
6326#ifdef RT_ARCH_AMD64
6327 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6328#elif defined(RT_ARCH_ARM64)
6329 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6330# else
6331# error "Port me!"
6332# endif
6333 return iemNativeEmitGuestRegValueCheckEx(pReNative, pCodeBuf, off, idxReg, enmGstReg);
6334}
6335
6336# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6337# ifdef RT_ARCH_AMD64
6338/**
6339 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6340 */
6341DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6342{
6343 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6344 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6345 if (idxSimdReg >= 8)
6346 pbCodeBuf[off++] = X86_OP_REX_R;
6347 pbCodeBuf[off++] = 0x0f;
6348 pbCodeBuf[off++] = 0x38;
6349 pbCodeBuf[off++] = 0x29;
6350 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6351
6352 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6353 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6354 pbCodeBuf[off++] = X86_OP_REX_W
6355 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6356 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6357 pbCodeBuf[off++] = 0x0f;
6358 pbCodeBuf[off++] = 0x3a;
6359 pbCodeBuf[off++] = 0x16;
6360 pbCodeBuf[off++] = 0xeb;
6361 pbCodeBuf[off++] = 0x00;
6362
6363 /* cmp tmp0, 0xffffffffffffffff. */
6364 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6365 pbCodeBuf[off++] = 0x83;
6366 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6367 pbCodeBuf[off++] = 0xff;
6368
6369 /* je/jz +1 */
6370 pbCodeBuf[off++] = 0x74;
6371 pbCodeBuf[off++] = 0x01;
6372
6373 /* int3 */
6374 pbCodeBuf[off++] = 0xcc;
6375
6376 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6377 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6378 pbCodeBuf[off++] = X86_OP_REX_W
6379 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6380 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6381 pbCodeBuf[off++] = 0x0f;
6382 pbCodeBuf[off++] = 0x3a;
6383 pbCodeBuf[off++] = 0x16;
6384 pbCodeBuf[off++] = 0xeb;
6385 pbCodeBuf[off++] = 0x01;
6386
6387 /* cmp tmp0, 0xffffffffffffffff. */
6388 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6389 pbCodeBuf[off++] = 0x83;
6390 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6391 pbCodeBuf[off++] = 0xff;
6392
6393 /* je/jz +1 */
6394 pbCodeBuf[off++] = 0x74;
6395 pbCodeBuf[off++] = 0x01;
6396
6397 /* int3 */
6398 pbCodeBuf[off++] = 0xcc;
6399
6400 return off;
6401}
6402# endif
6403
6404
6405/**
6406 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6407 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6408 * instruction if that's not the case.
6409 *
6410 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6411 * Trashes EFLAGS on AMD64.
6412 */
6413DECL_HIDDEN_THROW(uint32_t)
6414iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6415 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6416{
6417 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6418 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6419 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6420 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6421 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6422 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6423 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6424 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6425 return off;
6426
6427# ifdef RT_ARCH_AMD64
6428 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6429 {
6430 /* movdqa vectmp0, idxSimdReg */
6431 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6432
6433 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6434
6435 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6436 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6437 }
6438
6439 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6440 {
6441 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6442 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6443
6444 /* vextracti128 vectmp0, idxSimdReg, 1 */
6445 pbCodeBuf[off++] = X86_OP_VEX3;
6446 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6447 | X86_OP_VEX3_BYTE1_X
6448 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6449 | 0x03; /* Opcode map */
6450 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6451 pbCodeBuf[off++] = 0x39;
6452 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6453 pbCodeBuf[off++] = 0x01;
6454
6455 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6456 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6457 }
6458# elif defined(RT_ARCH_ARM64)
6459 /* mov vectmp0, [gstreg] */
6460 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6461
6462 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6463 {
6464 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6465 /* eor vectmp0, vectmp0, idxSimdReg */
6466 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6467 /* uaddlv vectmp0, vectmp0.16B */
6468 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6469 /* umov tmp0, vectmp0.H[0] */
6470 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6471 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6472 /* cbz tmp0, +1 */
6473 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6474 /* brk #0x1000+enmGstReg */
6475 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6476 }
6477
6478 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6479 {
6480 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6481 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6482 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6483 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6484 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6485 /* umov tmp0, (vectmp0 + 1).H[0] */
6486 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6487 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6488 /* cbz tmp0, +1 */
6489 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6490 /* brk #0x1000+enmGstReg */
6491 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6492 }
6493
6494# else
6495# error "Port me!"
6496# endif
6497
6498 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6499 return off;
6500}
6501# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6502
6503
6504/**
6505 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6506 * important bits.
6507 *
6508 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6509 * Trashes EFLAGS on AMD64.
6510 */
6511DECL_HIDDEN_THROW(uint32_t)
6512iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6513{
6514 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6515 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6516 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6517 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6518
6519#ifdef RT_ARCH_AMD64
6520 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6521
6522 /* je/jz +1 */
6523 pbCodeBuf[off++] = 0x74;
6524 pbCodeBuf[off++] = 0x01;
6525
6526 /* int3 */
6527 pbCodeBuf[off++] = 0xcc;
6528
6529# elif defined(RT_ARCH_ARM64)
6530 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6531
6532 /* b.eq +1 */
6533 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6534 /* brk #0x2000 */
6535 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6536
6537# else
6538# error "Port me!"
6539# endif
6540 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6541
6542 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6543 return off;
6544}
6545
6546#endif /* VBOX_STRICT */
6547
6548
6549#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6550/**
6551 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6552 */
6553DECL_HIDDEN_THROW(uint32_t)
6554iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6555{
6556 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6557
6558 fEflNeeded &= X86_EFL_STATUS_BITS;
6559 if (fEflNeeded)
6560 {
6561# ifdef RT_ARCH_AMD64
6562 /* test dword [pVCpu + offVCpu], imm32 */
6563 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6564 if (fEflNeeded <= 0xff)
6565 {
6566 pCodeBuf[off++] = 0xf6;
6567 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6568 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6569 }
6570 else
6571 {
6572 pCodeBuf[off++] = 0xf7;
6573 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6574 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6575 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6576 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6577 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6578 }
6579
6580 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6581 pCodeBuf[off++] = 0xcc;
6582
6583 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6584
6585# else
6586 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6587 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6588 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6589# ifdef RT_ARCH_ARM64
6590 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6591 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6592# else
6593# error "Port me!"
6594# endif
6595 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6596# endif
6597 }
6598 return off;
6599}
6600#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6601
6602
6603/**
6604 * Emits a code for checking the return code of a call and rcPassUp, returning
6605 * from the code if either are non-zero.
6606 */
6607DECL_HIDDEN_THROW(uint32_t)
6608iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6609{
6610#ifdef RT_ARCH_AMD64
6611 /*
6612 * AMD64: eax = call status code.
6613 */
6614
6615 /* edx = rcPassUp */
6616 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6617# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6618 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6619# endif
6620
6621 /* edx = eax | rcPassUp */
6622 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6623 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6624 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6625 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6626
6627 /* Jump to non-zero status return path. */
6628 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, off);
6629
6630 /* done. */
6631
6632#elif RT_ARCH_ARM64
6633 /*
6634 * ARM64: w0 = call status code.
6635 */
6636 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+3+3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
6637
6638# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6639 AssertCompile(ARMV8_A64_REG_X2 == IEMNATIVE_CALL_ARG2_GREG);
6640 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, ARMV8_A64_REG_X2, idxInstr);
6641# endif
6642 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6643
6644 pCodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6645
6646 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, pCodeBuf, off,
6647 ARMV8_A64_REG_X4, true /*f64Bit*/);
6648
6649#else
6650# error "port me"
6651#endif
6652 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6653 RT_NOREF_PV(idxInstr);
6654 return off;
6655}
6656
6657
6658/**
6659 * Emits a call to a CImpl function or something similar.
6660 */
6661DECL_HIDDEN_THROW(uint32_t)
6662iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6663 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6664{
6665 /* Writeback everything. */
6666 off = iemNativeRegFlushPendingWrites(pReNative, off);
6667
6668 /*
6669 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6670 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6671 */
6672 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6673 fGstShwFlush
6674 | RT_BIT_64(kIemNativeGstReg_Pc)
6675 | RT_BIT_64(kIemNativeGstReg_EFlags));
6676 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6677
6678 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6679
6680 /*
6681 * Load the parameters.
6682 */
6683#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6684 /* Special code the hidden VBOXSTRICTRC pointer. */
6685 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6686 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6687 if (cAddParams > 0)
6688 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6689 if (cAddParams > 1)
6690 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6691 if (cAddParams > 2)
6692 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6693 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6694
6695#else
6696 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6697 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6698 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6699 if (cAddParams > 0)
6700 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6701 if (cAddParams > 1)
6702 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6703 if (cAddParams > 2)
6704# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6705 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6706# else
6707 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6708# endif
6709#endif
6710
6711 /*
6712 * Make the call.
6713 */
6714 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6715
6716#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6717 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6718#endif
6719
6720#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6721 pReNative->Core.fDebugPcInitialized = false;
6722 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6723#endif
6724
6725 /*
6726 * Check the status code.
6727 */
6728 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6729}
6730
6731
6732/**
6733 * Emits a call to a threaded worker function.
6734 */
6735DECL_HIDDEN_THROW(uint32_t)
6736iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6737{
6738 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6739 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6740
6741 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6742 off = iemNativeRegFlushPendingWrites(pReNative, off);
6743
6744 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6745 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6746
6747#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6748 /* The threaded function may throw / long jmp, so set current instruction
6749 number if we're counting. */
6750 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6751#endif
6752
6753 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6754
6755#ifdef RT_ARCH_AMD64
6756 /* Load the parameters and emit the call. */
6757# ifdef RT_OS_WINDOWS
6758# ifndef VBOXSTRICTRC_STRICT_ENABLED
6759 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6760 if (cParams > 0)
6761 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6762 if (cParams > 1)
6763 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6764 if (cParams > 2)
6765 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6766# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6767 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6768 if (cParams > 0)
6769 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6770 if (cParams > 1)
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6772 if (cParams > 2)
6773 {
6774 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6775 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6776 }
6777 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6778# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6779# else
6780 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6781 if (cParams > 0)
6782 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6783 if (cParams > 1)
6784 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6785 if (cParams > 2)
6786 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6787# endif
6788
6789 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6790
6791# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6792 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6793# endif
6794
6795#elif RT_ARCH_ARM64
6796 /*
6797 * ARM64:
6798 */
6799 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6800 if (cParams > 0)
6801 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6802 if (cParams > 1)
6803 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6804 if (cParams > 2)
6805 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6806
6807 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6808
6809#else
6810# error "port me"
6811#endif
6812
6813#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6814 pReNative->Core.fDebugPcInitialized = false;
6815 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6816#endif
6817
6818 /*
6819 * Check the status code.
6820 */
6821 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6822
6823 return off;
6824}
6825
6826
6827/**
6828 * The default liveness function, matching iemNativeEmitThreadedCall.
6829 */
6830IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6831{
6832 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6833 RT_NOREF(pCallEntry);
6834}
6835
6836#ifdef VBOX_WITH_STATISTICS
6837
6838/**
6839 * Emits code to update the thread call statistics.
6840 */
6841DECL_INLINE_THROW(uint32_t)
6842iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6843{
6844 /*
6845 * Update threaded function stats.
6846 */
6847 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6848 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6849# if defined(RT_ARCH_ARM64)
6850 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6851 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6852 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6853 iemNativeRegFreeTmp(pReNative, idxTmp1);
6854 iemNativeRegFreeTmp(pReNative, idxTmp2);
6855# else
6856 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6857# endif
6858 return off;
6859}
6860
6861
6862/**
6863 * Emits code to update the TB exit reason statistics.
6864 */
6865DECL_INLINE_THROW(uint32_t)
6866iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6867{
6868 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6869 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6870 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6871 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6872 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6873
6874 return off;
6875}
6876
6877#endif /* VBOX_WITH_STATISTICS */
6878
6879/**
6880 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6881 */
6882static uint32_t
6883iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6884{
6885 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6886 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6887
6888 /* Jump to ReturnBreak if the return register is NULL. */
6889 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6890 true /*f64Bit*/, offReturnBreak);
6891
6892 /* Okay, continue executing the next TB. */
6893 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6894 return off;
6895}
6896
6897
6898/**
6899 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6900 */
6901static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6902{
6903 /* set the return status */
6904 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6905}
6906
6907
6908/**
6909 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6910 */
6911static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6912{
6913 /* set the return status */
6914 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6915}
6916
6917
6918/**
6919 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6920 */
6921static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6922{
6923 /* set the return status */
6924 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6925}
6926
6927
6928/**
6929 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6930 */
6931static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6932{
6933 /*
6934 * Generate the rc + rcPassUp fiddling code.
6935 */
6936 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6937#ifdef RT_ARCH_AMD64
6938# ifdef RT_OS_WINDOWS
6939# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6940 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6941# endif
6942 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6943 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6944# else
6945 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6946 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6947# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6948 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6949# endif
6950# endif
6951# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6952 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6953# endif
6954
6955#else
6956 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6957 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6958 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6959#endif
6960
6961 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6962 return off;
6963}
6964
6965
6966/**
6967 * Emits a standard epilog.
6968 */
6969static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6970{
6971 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6972
6973 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6974 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6975
6976 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6977 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6978
6979 /*
6980 * Restore registers and return.
6981 */
6982#ifdef RT_ARCH_AMD64
6983 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6984
6985 /* Reposition esp at the r15 restore point. */
6986 pbCodeBuf[off++] = X86_OP_REX_W;
6987 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6988 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6989 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6990
6991 /* Pop non-volatile registers and return */
6992 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6993 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6994 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6995 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6996 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6997 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6998 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6999 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7000# ifdef RT_OS_WINDOWS
7001 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7002 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7003# endif
7004 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7005 pbCodeBuf[off++] = 0xc9; /* leave */
7006 pbCodeBuf[off++] = 0xc3; /* ret */
7007 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7008
7009#elif RT_ARCH_ARM64
7010 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7011
7012 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7013 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7014 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7015 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7016 IEMNATIVE_FRAME_VAR_SIZE / 8);
7017 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7018 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7019 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7020 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7021 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7022 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7023 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7024 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7025 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7026 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7027 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7028 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7029
7030 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7031 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7032 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7033 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7034
7035 /* retab / ret */
7036# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7037 if (1)
7038 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7039 else
7040# endif
7041 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7042
7043#else
7044# error "port me"
7045#endif
7046 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7047
7048 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
7049 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7050
7051 return off;
7052}
7053
7054
7055
7056/*********************************************************************************************************************************
7057* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7058*********************************************************************************************************************************/
7059
7060/**
7061 * Internal work that allocates a variable with kind set to
7062 * kIemNativeVarKind_Invalid and no current stack allocation.
7063 *
7064 * The kind will either be set by the caller or later when the variable is first
7065 * assigned a value.
7066 *
7067 * @returns Unpacked index.
7068 * @internal
7069 */
7070static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7071{
7072 Assert(cbType > 0 && cbType <= 64);
7073 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7074 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7075 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7076 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7077 pReNative->Core.aVars[idxVar].cbVar = cbType;
7078 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7079 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7080 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7081 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7082 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7083 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7084 pReNative->Core.aVars[idxVar].u.uValue = 0;
7085#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7086 pReNative->Core.aVars[idxVar].fSimdReg = false;
7087#endif
7088 return idxVar;
7089}
7090
7091
7092/**
7093 * Internal work that allocates an argument variable w/o setting enmKind.
7094 *
7095 * @returns Unpacked index.
7096 * @internal
7097 */
7098static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7099{
7100 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7101 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7102 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7103
7104 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7105 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7106 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7107 return idxVar;
7108}
7109
7110
7111/**
7112 * Gets the stack slot for a stack variable, allocating one if necessary.
7113 *
7114 * Calling this function implies that the stack slot will contain a valid
7115 * variable value. The caller deals with any register currently assigned to the
7116 * variable, typically by spilling it into the stack slot.
7117 *
7118 * @returns The stack slot number.
7119 * @param pReNative The recompiler state.
7120 * @param idxVar The variable.
7121 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7122 */
7123DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7124{
7125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7126 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7127 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7128
7129 /* Already got a slot? */
7130 uint8_t const idxStackSlot = pVar->idxStackSlot;
7131 if (idxStackSlot != UINT8_MAX)
7132 {
7133 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7134 return idxStackSlot;
7135 }
7136
7137 /*
7138 * A single slot is easy to allocate.
7139 * Allocate them from the top end, closest to BP, to reduce the displacement.
7140 */
7141 if (pVar->cbVar <= sizeof(uint64_t))
7142 {
7143 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7144 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7145 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7146 pVar->idxStackSlot = (uint8_t)iSlot;
7147 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7148 return (uint8_t)iSlot;
7149 }
7150
7151 /*
7152 * We need more than one stack slot.
7153 *
7154 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7155 */
7156 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7157 Assert(pVar->cbVar <= 64);
7158 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7159 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7160 uint32_t bmStack = pReNative->Core.bmStack;
7161 while (bmStack != UINT32_MAX)
7162 {
7163 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7164 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7165 iSlot = (iSlot - 1) & ~fBitAlignMask;
7166 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7167 {
7168 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7169 pVar->idxStackSlot = (uint8_t)iSlot;
7170 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7171 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7172 return (uint8_t)iSlot;
7173 }
7174
7175 bmStack |= (fBitAllocMask << iSlot);
7176 }
7177 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7178}
7179
7180
7181/**
7182 * Changes the variable to a stack variable.
7183 *
7184 * Currently this is s only possible to do the first time the variable is used,
7185 * switching later is can be implemented but not done.
7186 *
7187 * @param pReNative The recompiler state.
7188 * @param idxVar The variable.
7189 * @throws VERR_IEM_VAR_IPE_2
7190 */
7191DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7192{
7193 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7194 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7195 if (pVar->enmKind != kIemNativeVarKind_Stack)
7196 {
7197 /* We could in theory transition from immediate to stack as well, but it
7198 would involve the caller doing work storing the value on the stack. So,
7199 till that's required we only allow transition from invalid. */
7200 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7201 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7202 pVar->enmKind = kIemNativeVarKind_Stack;
7203
7204 /* Note! We don't allocate a stack slot here, that's only done when a
7205 slot is actually needed to hold a variable value. */
7206 }
7207}
7208
7209
7210/**
7211 * Sets it to a variable with a constant value.
7212 *
7213 * This does not require stack storage as we know the value and can always
7214 * reload it, unless of course it's referenced.
7215 *
7216 * @param pReNative The recompiler state.
7217 * @param idxVar The variable.
7218 * @param uValue The immediate value.
7219 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7220 */
7221DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7222{
7223 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7224 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7225 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7226 {
7227 /* Only simple transitions for now. */
7228 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7229 pVar->enmKind = kIemNativeVarKind_Immediate;
7230 }
7231 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7232
7233 pVar->u.uValue = uValue;
7234 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7235 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7236 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7237}
7238
7239
7240/**
7241 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7242 *
7243 * This does not require stack storage as we know the value and can always
7244 * reload it. Loading is postponed till needed.
7245 *
7246 * @param pReNative The recompiler state.
7247 * @param idxVar The variable. Unpacked.
7248 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7249 *
7250 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7251 * @internal
7252 */
7253static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7254{
7255 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7256 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7257
7258 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7259 {
7260 /* Only simple transitions for now. */
7261 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7262 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7263 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7264 }
7265 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7266
7267 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7268
7269 /* Update the other variable, ensure it's a stack variable. */
7270 /** @todo handle variables with const values... that'll go boom now. */
7271 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7272 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7273}
7274
7275
7276/**
7277 * Sets the variable to a reference (pointer) to a guest register reference.
7278 *
7279 * This does not require stack storage as we know the value and can always
7280 * reload it. Loading is postponed till needed.
7281 *
7282 * @param pReNative The recompiler state.
7283 * @param idxVar The variable.
7284 * @param enmRegClass The class guest registers to reference.
7285 * @param idxReg The register within @a enmRegClass to reference.
7286 *
7287 * @throws VERR_IEM_VAR_IPE_2
7288 */
7289DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7290 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7291{
7292 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7293 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7294
7295 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7296 {
7297 /* Only simple transitions for now. */
7298 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7299 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7300 }
7301 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7302
7303 pVar->u.GstRegRef.enmClass = enmRegClass;
7304 pVar->u.GstRegRef.idx = idxReg;
7305}
7306
7307
7308DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7309{
7310 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7311}
7312
7313
7314DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7315{
7316 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7317
7318 /* Since we're using a generic uint64_t value type, we must truncate it if
7319 the variable is smaller otherwise we may end up with too large value when
7320 scaling up a imm8 w/ sign-extension.
7321
7322 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7323 in the bios, bx=1) when running on arm, because clang expect 16-bit
7324 register parameters to have bits 16 and up set to zero. Instead of
7325 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7326 CF value in the result. */
7327 switch (cbType)
7328 {
7329 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7330 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7331 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7332 }
7333 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7334 return idxVar;
7335}
7336
7337
7338DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7339{
7340 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7341 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7342 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7343 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7344 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7345 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7346
7347 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7348 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7349 return idxArgVar;
7350}
7351
7352
7353DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7354{
7355 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7356 /* Don't set to stack now, leave that to the first use as for instance
7357 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7358 return idxVar;
7359}
7360
7361
7362DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7363{
7364 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7365
7366 /* Since we're using a generic uint64_t value type, we must truncate it if
7367 the variable is smaller otherwise we may end up with too large value when
7368 scaling up a imm8 w/ sign-extension. */
7369 switch (cbType)
7370 {
7371 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7372 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7373 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7374 }
7375 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7376 return idxVar;
7377}
7378
7379
7380DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7381 uint8_t cbType, uint8_t idxVarOther)
7382{
7383 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7384 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7385
7386 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7387 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7388
7389/** @todo combine MOV and AND using MOVZX/similar. */
7390 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7391
7392 /* Truncate the value to this variables size. */
7393 switch (cbType)
7394 {
7395 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7396 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7397 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7398 }
7399
7400 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7401 iemNativeVarRegisterRelease(pReNative, idxVar);
7402 return idxVar;
7403}
7404
7405
7406/**
7407 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7408 * fixed till we call iemNativeVarRegisterRelease.
7409 *
7410 * @returns The host register number.
7411 * @param pReNative The recompiler state.
7412 * @param idxVar The variable.
7413 * @param poff Pointer to the instruction buffer offset.
7414 * In case a register needs to be freed up or the value
7415 * loaded off the stack.
7416 * @param fInitialized Set if the variable must already have been
7417 * initialized. Will throw VERR_IEM_VAR_NOT_INITIALIZED
7418 * if this is not the case.
7419 * @param idxRegPref Preferred register number or UINT8_MAX.
7420 *
7421 * @note Must not modify the host status flags!
7422 */
7423DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7424 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7425{
7426 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7427 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7428 Assert(pVar->cbVar <= 8);
7429 Assert(!pVar->fRegAcquired);
7430
7431 uint8_t idxReg = pVar->idxReg;
7432 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7433 {
7434 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7435 && pVar->enmKind < kIemNativeVarKind_End);
7436 pVar->fRegAcquired = true;
7437 return idxReg;
7438 }
7439
7440 /*
7441 * If the kind of variable has not yet been set, default to 'stack'.
7442 */
7443 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7444 && pVar->enmKind < kIemNativeVarKind_End);
7445 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7446 iemNativeVarSetKindToStack(pReNative, idxVar);
7447
7448 /*
7449 * We have to allocate a register for the variable, even if its a stack one
7450 * as we don't know if there are modification being made to it before its
7451 * finalized (todo: analyze and insert hints about that?).
7452 *
7453 * If we can, we try get the correct register for argument variables. This
7454 * is assuming that most argument variables are fetched as close as possible
7455 * to the actual call, so that there aren't any interfering hidden calls
7456 * (memory accesses, etc) inbetween.
7457 *
7458 * If we cannot or it's a variable, we make sure no argument registers
7459 * that will be used by this MC block will be allocated here, and we always
7460 * prefer non-volatile registers to avoid needing to spill stuff for internal
7461 * call.
7462 */
7463 /** @todo Detect too early argument value fetches and warn about hidden
7464 * calls causing less optimal code to be generated in the python script. */
7465
7466 uint8_t const uArgNo = pVar->uArgNo;
7467 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7468 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7469 {
7470 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7471
7472#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7473 /* Writeback any dirty shadow registers we are about to unshadow. */
7474 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7475#endif
7476
7477 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7478 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7479 }
7480 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7481 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7482 {
7483 /** @todo there must be a better way for this and boot cArgsX? */
7484 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7485 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7486 & ~pReNative->Core.bmHstRegsWithGstShadow
7487 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7488 & fNotArgsMask;
7489 if (fRegs)
7490 {
7491 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7492 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7493 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7494 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7495 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7496 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7497 }
7498 else
7499 {
7500 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7501 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7502 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7503 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7504 }
7505 }
7506 else
7507 {
7508 idxReg = idxRegPref;
7509 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7510 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7511 }
7512 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7513 pVar->idxReg = idxReg;
7514
7515#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7516 pVar->fSimdReg = false;
7517#endif
7518
7519 /*
7520 * Load it off the stack if we've got a stack slot.
7521 */
7522 uint8_t const idxStackSlot = pVar->idxStackSlot;
7523 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7524 {
7525 Assert(fInitialized);
7526 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7527 switch (pVar->cbVar)
7528 {
7529 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7530 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7531 case 3: AssertFailed(); RT_FALL_THRU();
7532 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7533 default: AssertFailed(); RT_FALL_THRU();
7534 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7535 }
7536 }
7537 else
7538 {
7539 Assert(idxStackSlot == UINT8_MAX);
7540 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7541 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7542 else
7543 {
7544 /*
7545 * Convert from immediate to stack/register. This is currently only
7546 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7547 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7548 */
7549 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7550 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7551 idxVar, idxReg, pVar->u.uValue));
7552 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7553 pVar->enmKind = kIemNativeVarKind_Stack;
7554 }
7555 }
7556
7557 pVar->fRegAcquired = true;
7558 return idxReg;
7559}
7560
7561
7562#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7563/**
7564 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7565 * fixed till we call iemNativeVarRegisterRelease.
7566 *
7567 * @returns The host register number.
7568 * @param pReNative The recompiler state.
7569 * @param idxVar The variable.
7570 * @param poff Pointer to the instruction buffer offset.
7571 * In case a register needs to be freed up or the value
7572 * loaded off the stack.
7573 * @param fInitialized Set if the variable must already have been initialized.
7574 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7575 * the case.
7576 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7577 */
7578DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7579 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7580{
7581 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7582 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7583 Assert( pVar->cbVar == sizeof(RTUINT128U)
7584 || pVar->cbVar == sizeof(RTUINT256U));
7585 Assert(!pVar->fRegAcquired);
7586
7587 uint8_t idxReg = pVar->idxReg;
7588 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7589 {
7590 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7591 && pVar->enmKind < kIemNativeVarKind_End);
7592 pVar->fRegAcquired = true;
7593 return idxReg;
7594 }
7595
7596 /*
7597 * If the kind of variable has not yet been set, default to 'stack'.
7598 */
7599 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7600 && pVar->enmKind < kIemNativeVarKind_End);
7601 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7602 iemNativeVarSetKindToStack(pReNative, idxVar);
7603
7604 /*
7605 * We have to allocate a register for the variable, even if its a stack one
7606 * as we don't know if there are modification being made to it before its
7607 * finalized (todo: analyze and insert hints about that?).
7608 *
7609 * If we can, we try get the correct register for argument variables. This
7610 * is assuming that most argument variables are fetched as close as possible
7611 * to the actual call, so that there aren't any interfering hidden calls
7612 * (memory accesses, etc) inbetween.
7613 *
7614 * If we cannot or it's a variable, we make sure no argument registers
7615 * that will be used by this MC block will be allocated here, and we always
7616 * prefer non-volatile registers to avoid needing to spill stuff for internal
7617 * call.
7618 */
7619 /** @todo Detect too early argument value fetches and warn about hidden
7620 * calls causing less optimal code to be generated in the python script. */
7621
7622 uint8_t const uArgNo = pVar->uArgNo;
7623 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7624
7625 /* SIMD is bit simpler for now because there is no support for arguments. */
7626 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7627 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7628 {
7629 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7630 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7631 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7632 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7633 & fNotArgsMask;
7634 if (fRegs)
7635 {
7636 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7637 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7638 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7639 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7640 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7641 }
7642 else
7643 {
7644 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7645 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7646 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7647 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7648 }
7649 }
7650 else
7651 {
7652 idxReg = idxRegPref;
7653 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7654 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7655 }
7656 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7657
7658 pVar->fSimdReg = true;
7659 pVar->idxReg = idxReg;
7660
7661 /*
7662 * Load it off the stack if we've got a stack slot.
7663 */
7664 uint8_t const idxStackSlot = pVar->idxStackSlot;
7665 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7666 {
7667 Assert(fInitialized);
7668 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7669 switch (pVar->cbVar)
7670 {
7671 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7672 default: AssertFailed(); RT_FALL_THRU();
7673 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7674 }
7675 }
7676 else
7677 {
7678 Assert(idxStackSlot == UINT8_MAX);
7679 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7680 }
7681 pVar->fRegAcquired = true;
7682 return idxReg;
7683}
7684#endif
7685
7686
7687/**
7688 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7689 * guest register.
7690 *
7691 * This function makes sure there is a register for it and sets it to be the
7692 * current shadow copy of @a enmGstReg.
7693 *
7694 * @returns The host register number.
7695 * @param pReNative The recompiler state.
7696 * @param idxVar The variable.
7697 * @param enmGstReg The guest register this variable will be written to
7698 * after this call.
7699 * @param poff Pointer to the instruction buffer offset.
7700 * In case a register needs to be freed up or if the
7701 * variable content needs to be loaded off the stack.
7702 *
7703 * @note We DO NOT expect @a idxVar to be an argument variable,
7704 * because we can only in the commit stage of an instruction when this
7705 * function is used.
7706 */
7707DECL_HIDDEN_THROW(uint8_t)
7708iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7709{
7710 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7711 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7712 Assert(!pVar->fRegAcquired);
7713 AssertMsgStmt( pVar->cbVar <= 8
7714 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7715 || pVar->enmKind == kIemNativeVarKind_Stack),
7716 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7717 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7718 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7719
7720 /*
7721 * This shouldn't ever be used for arguments, unless it's in a weird else
7722 * branch that doesn't do any calling and even then it's questionable.
7723 *
7724 * However, in case someone writes crazy wrong MC code and does register
7725 * updates before making calls, just use the regular register allocator to
7726 * ensure we get a register suitable for the intended argument number.
7727 */
7728 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7729
7730 /*
7731 * If there is already a register for the variable, we transfer/set the
7732 * guest shadow copy assignment to it.
7733 */
7734 uint8_t idxReg = pVar->idxReg;
7735 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7736 {
7737#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7738 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7739 {
7740# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7741 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7742 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7743# endif
7744 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7745 }
7746#endif
7747
7748 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7749 {
7750 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7751 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7752 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7753 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7754 }
7755 else
7756 {
7757 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7758 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7759 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7760 }
7761 /** @todo figure this one out. We need some way of making sure the register isn't
7762 * modified after this point, just in case we start writing crappy MC code. */
7763 pVar->enmGstReg = enmGstReg;
7764 pVar->fRegAcquired = true;
7765 return idxReg;
7766 }
7767 Assert(pVar->uArgNo == UINT8_MAX);
7768
7769 /*
7770 * Because this is supposed to be the commit stage, we're just tag along with the
7771 * temporary register allocator and upgrade it to a variable register.
7772 */
7773 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7774 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7775 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7776 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7777 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7778 pVar->idxReg = idxReg;
7779
7780 /*
7781 * Now we need to load the register value.
7782 */
7783 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7784 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7785 else
7786 {
7787 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7788 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7789 switch (pVar->cbVar)
7790 {
7791 case sizeof(uint64_t):
7792 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7793 break;
7794 case sizeof(uint32_t):
7795 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7796 break;
7797 case sizeof(uint16_t):
7798 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7799 break;
7800 case sizeof(uint8_t):
7801 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7802 break;
7803 default:
7804 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7805 }
7806 }
7807
7808 pVar->fRegAcquired = true;
7809 return idxReg;
7810}
7811
7812
7813/**
7814 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7815 *
7816 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7817 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7818 * requirement of flushing anything in volatile host registers when making a
7819 * call.
7820 *
7821 * @returns New @a off value.
7822 * @param pReNative The recompiler state.
7823 * @param off The code buffer position.
7824 * @param fHstRegsNotToSave Set of registers not to save & restore.
7825 */
7826DECL_HIDDEN_THROW(uint32_t)
7827iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7828{
7829 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7830 if (fHstRegs)
7831 {
7832 do
7833 {
7834 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7835 fHstRegs &= ~RT_BIT_32(idxHstReg);
7836
7837 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7838 {
7839 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7840 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7841 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7842 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7843 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7844 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7845 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7846 {
7847 case kIemNativeVarKind_Stack:
7848 {
7849 /* Temporarily spill the variable register. */
7850 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7851 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7852 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7853 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7854 continue;
7855 }
7856
7857 case kIemNativeVarKind_Immediate:
7858 case kIemNativeVarKind_VarRef:
7859 case kIemNativeVarKind_GstRegRef:
7860 /* It is weird to have any of these loaded at this point. */
7861 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7862 continue;
7863
7864 case kIemNativeVarKind_End:
7865 case kIemNativeVarKind_Invalid:
7866 break;
7867 }
7868 AssertFailed();
7869 }
7870 else
7871 {
7872 /*
7873 * Allocate a temporary stack slot and spill the register to it.
7874 */
7875 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7876 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7877 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7878 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7879 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7880 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7881 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7882 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7883 }
7884 } while (fHstRegs);
7885 }
7886#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7887
7888 /*
7889 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7890 * which would be more difficult due to spanning multiple stack slots and different sizes
7891 * (besides we only have a limited amount of slots at the moment).
7892 *
7893 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7894 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7895 */
7896 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7897
7898 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
7899 if (fHstRegs)
7900 {
7901 do
7902 {
7903 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7904 fHstRegs &= ~RT_BIT_32(idxHstReg);
7905
7906 /* Fixed reserved and temporary registers don't need saving. */
7907 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved
7908 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp); included below */
7909 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7910
7911 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7912 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7913 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7914 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7915 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7916 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7917 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7918 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7919 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7920 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7921 {
7922 case kIemNativeVarKind_Stack:
7923 {
7924 /* Temporarily spill the variable register. */
7925 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7926 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7927 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7928 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7929 if (cbVar == sizeof(RTUINT128U))
7930 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7931 else
7932 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7933 continue;
7934 }
7935
7936 case kIemNativeVarKind_Immediate:
7937 case kIemNativeVarKind_VarRef:
7938 case kIemNativeVarKind_GstRegRef:
7939 /* It is weird to have any of these loaded at this point. */
7940 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7941 continue;
7942
7943 case kIemNativeVarKind_End:
7944 case kIemNativeVarKind_Invalid:
7945 break;
7946 }
7947 AssertFailed();
7948 } while (fHstRegs);
7949 }
7950#endif
7951 return off;
7952}
7953
7954
7955/**
7956 * Emit code to restore volatile registers after to a call to a helper.
7957 *
7958 * @returns New @a off value.
7959 * @param pReNative The recompiler state.
7960 * @param off The code buffer position.
7961 * @param fHstRegsNotToSave Set of registers not to save & restore.
7962 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7963 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7964 */
7965DECL_HIDDEN_THROW(uint32_t)
7966iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7967{
7968 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7969 if (fHstRegs)
7970 {
7971 do
7972 {
7973 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7974 fHstRegs &= ~RT_BIT_32(idxHstReg);
7975
7976 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7977 {
7978 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7979 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7980 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7981 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7982 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7983 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7984 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7985 {
7986 case kIemNativeVarKind_Stack:
7987 {
7988 /* Unspill the variable register. */
7989 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7990 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7991 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7992 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7993 continue;
7994 }
7995
7996 case kIemNativeVarKind_Immediate:
7997 case kIemNativeVarKind_VarRef:
7998 case kIemNativeVarKind_GstRegRef:
7999 /* It is weird to have any of these loaded at this point. */
8000 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8001 continue;
8002
8003 case kIemNativeVarKind_End:
8004 case kIemNativeVarKind_Invalid:
8005 break;
8006 }
8007 AssertFailed();
8008 }
8009 else
8010 {
8011 /*
8012 * Restore from temporary stack slot.
8013 */
8014 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8015 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8016 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8017 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8018
8019 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8020 }
8021 } while (fHstRegs);
8022 }
8023#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8024 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
8025 if (fHstRegs)
8026 {
8027 do
8028 {
8029 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8030 fHstRegs &= ~RT_BIT_32(idxHstReg);
8031
8032 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp
8033 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved); - included below. */
8034 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8035
8036 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8037 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8038 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8039 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8040 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8041 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8042 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8043 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8044 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8045 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8046 {
8047 case kIemNativeVarKind_Stack:
8048 {
8049 /* Unspill the variable register. */
8050 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8051 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8052 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8053 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8054
8055 if (cbVar == sizeof(RTUINT128U))
8056 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8057 else
8058 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8059 continue;
8060 }
8061
8062 case kIemNativeVarKind_Immediate:
8063 case kIemNativeVarKind_VarRef:
8064 case kIemNativeVarKind_GstRegRef:
8065 /* It is weird to have any of these loaded at this point. */
8066 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8067 continue;
8068
8069 case kIemNativeVarKind_End:
8070 case kIemNativeVarKind_Invalid:
8071 break;
8072 }
8073 AssertFailed();
8074 } while (fHstRegs);
8075 }
8076#endif
8077 return off;
8078}
8079
8080
8081/**
8082 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8083 *
8084 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8085 *
8086 * ASSUMES that @a idxVar is valid and unpacked.
8087 */
8088DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8089{
8090 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8091 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8092 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8093 {
8094 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8095 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8096 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8097 Assert(cSlots > 0);
8098 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8099 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8100 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8101 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8102 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8103 }
8104 else
8105 Assert(idxStackSlot == UINT8_MAX);
8106}
8107
8108
8109/**
8110 * Worker that frees a single variable.
8111 *
8112 * ASSUMES that @a idxVar is valid and unpacked.
8113 */
8114DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8115{
8116 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8117 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8118 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8119
8120 /* Free the host register first if any assigned. */
8121 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8122#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8123 if ( idxHstReg != UINT8_MAX
8124 && pReNative->Core.aVars[idxVar].fSimdReg)
8125 {
8126 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8127 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8128 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8129 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8130 }
8131 else
8132#endif
8133 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8134 {
8135 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8136 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8137 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8138 }
8139
8140 /* Free argument mapping. */
8141 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8142 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8143 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8144
8145 /* Free the stack slots. */
8146 iemNativeVarFreeStackSlots(pReNative, idxVar);
8147
8148 /* Free the actual variable. */
8149 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8150 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8151}
8152
8153
8154/**
8155 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8156 */
8157DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8158{
8159 while (bmVars != 0)
8160 {
8161 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8162 bmVars &= ~RT_BIT_32(idxVar);
8163
8164#if 1 /** @todo optimize by simplifying this later... */
8165 iemNativeVarFreeOneWorker(pReNative, idxVar);
8166#else
8167 /* Only need to free the host register, the rest is done as bulk updates below. */
8168 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8169 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8170 {
8171 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8172 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8173 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8174 }
8175#endif
8176 }
8177#if 0 /** @todo optimize by simplifying this later... */
8178 pReNative->Core.bmVars = 0;
8179 pReNative->Core.bmStack = 0;
8180 pReNative->Core.u64ArgVars = UINT64_MAX;
8181#endif
8182}
8183
8184
8185
8186/*********************************************************************************************************************************
8187* Emitters for IEM_MC_CALL_CIMPL_XXX *
8188*********************************************************************************************************************************/
8189
8190/**
8191 * Emits code to load a reference to the given guest register into @a idxGprDst.
8192 */
8193DECL_HIDDEN_THROW(uint32_t)
8194iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8195 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8196{
8197#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8198 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8199#endif
8200
8201 /*
8202 * Get the offset relative to the CPUMCTX structure.
8203 */
8204 uint32_t offCpumCtx;
8205 switch (enmClass)
8206 {
8207 case kIemNativeGstRegRef_Gpr:
8208 Assert(idxRegInClass < 16);
8209 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8210 break;
8211
8212 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8213 Assert(idxRegInClass < 4);
8214 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8215 break;
8216
8217 case kIemNativeGstRegRef_EFlags:
8218 Assert(idxRegInClass == 0);
8219 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8220 break;
8221
8222 case kIemNativeGstRegRef_MxCsr:
8223 Assert(idxRegInClass == 0);
8224 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8225 break;
8226
8227 case kIemNativeGstRegRef_FpuReg:
8228 Assert(idxRegInClass < 8);
8229 AssertFailed(); /** @todo what kind of indexing? */
8230 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8231 break;
8232
8233 case kIemNativeGstRegRef_MReg:
8234 Assert(idxRegInClass < 8);
8235 AssertFailed(); /** @todo what kind of indexing? */
8236 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8237 break;
8238
8239 case kIemNativeGstRegRef_XReg:
8240 Assert(idxRegInClass < 16);
8241 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8242 break;
8243
8244 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8245 Assert(idxRegInClass == 0);
8246 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8247 break;
8248
8249 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8250 Assert(idxRegInClass == 0);
8251 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8252 break;
8253
8254 default:
8255 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8256 }
8257
8258 /*
8259 * Load the value into the destination register.
8260 */
8261#ifdef RT_ARCH_AMD64
8262 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8263
8264#elif defined(RT_ARCH_ARM64)
8265 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8266 Assert(offCpumCtx < 4096);
8267 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8268
8269#else
8270# error "Port me!"
8271#endif
8272
8273 return off;
8274}
8275
8276
8277/**
8278 * Common code for CIMPL and AIMPL calls.
8279 *
8280 * These are calls that uses argument variables and such. They should not be
8281 * confused with internal calls required to implement an MC operation,
8282 * like a TLB load and similar.
8283 *
8284 * Upon return all that is left to do is to load any hidden arguments and
8285 * perform the call. All argument variables are freed.
8286 *
8287 * @returns New code buffer offset; throws VBox status code on error.
8288 * @param pReNative The native recompile state.
8289 * @param off The code buffer offset.
8290 * @param cArgs The total nubmer of arguments (includes hidden
8291 * count).
8292 * @param cHiddenArgs The number of hidden arguments. The hidden
8293 * arguments must not have any variable declared for
8294 * them, whereas all the regular arguments must
8295 * (tstIEMCheckMc ensures this).
8296 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8297 * this will still flush pending writes in call volatile registers if false.
8298 */
8299DECL_HIDDEN_THROW(uint32_t)
8300iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8301 bool fFlushPendingWrites /*= true*/)
8302{
8303#ifdef VBOX_STRICT
8304 /*
8305 * Assert sanity.
8306 */
8307 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8308 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8309 for (unsigned i = 0; i < cHiddenArgs; i++)
8310 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8311 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8312 {
8313 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8314 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8315 }
8316 iemNativeRegAssertSanity(pReNative);
8317#endif
8318
8319 /* We don't know what the called function makes use of, so flush any pending register writes. */
8320 RT_NOREF(fFlushPendingWrites);
8321#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8322 if (fFlushPendingWrites)
8323#endif
8324 off = iemNativeRegFlushPendingWrites(pReNative, off);
8325
8326 /*
8327 * Before we do anything else, go over variables that are referenced and
8328 * make sure they are not in a register.
8329 */
8330 uint32_t bmVars = pReNative->Core.bmVars;
8331 if (bmVars)
8332 {
8333 do
8334 {
8335 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8336 bmVars &= ~RT_BIT_32(idxVar);
8337
8338 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8339 {
8340 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8341#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8342 if ( idxRegOld != UINT8_MAX
8343 && pReNative->Core.aVars[idxVar].fSimdReg)
8344 {
8345 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8346 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8347
8348 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8349 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8350 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8351 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8352 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8353 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8354 else
8355 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8356
8357 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8358 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8359
8360 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8361 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8362 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8363 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8364 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8365 }
8366 else
8367#endif
8368 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8369 {
8370 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8371 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8372 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8373 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8374 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8375
8376 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8377 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8378 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8379 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8380 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8381 }
8382 }
8383 } while (bmVars != 0);
8384#if 0 //def VBOX_STRICT
8385 iemNativeRegAssertSanity(pReNative);
8386#endif
8387 }
8388
8389 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8390
8391#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8392 /*
8393 * At the very first step go over the host registers that will be used for arguments
8394 * don't shadow anything which needs writing back first.
8395 */
8396 for (uint32_t i = 0; i < cRegArgs; i++)
8397 {
8398 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8399
8400 /* Writeback any dirty guest shadows before using this register. */
8401 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8402 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8403 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8404 }
8405#endif
8406
8407 /*
8408 * First, go over the host registers that will be used for arguments and make
8409 * sure they either hold the desired argument or are free.
8410 */
8411 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8412 {
8413 for (uint32_t i = 0; i < cRegArgs; i++)
8414 {
8415 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8416 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8417 {
8418 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8419 {
8420 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8421 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8422 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8423 Assert(pVar->idxReg == idxArgReg);
8424 uint8_t const uArgNo = pVar->uArgNo;
8425 if (uArgNo == i)
8426 { /* prefect */ }
8427 /* The variable allocator logic should make sure this is impossible,
8428 except for when the return register is used as a parameter (ARM,
8429 but not x86). */
8430#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8431 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8432 {
8433# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8434# error "Implement this"
8435# endif
8436 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8437 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8438 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8439 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8440 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8441 }
8442#endif
8443 else
8444 {
8445 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8446
8447 if (pVar->enmKind == kIemNativeVarKind_Stack)
8448 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8449 else
8450 {
8451 /* just free it, can be reloaded if used again */
8452 pVar->idxReg = UINT8_MAX;
8453 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8454 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8455 }
8456 }
8457 }
8458 else
8459 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8460 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8461 }
8462 }
8463#if 0 //def VBOX_STRICT
8464 iemNativeRegAssertSanity(pReNative);
8465#endif
8466 }
8467
8468 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8469
8470#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8471 /*
8472 * If there are any stack arguments, make sure they are in their place as well.
8473 *
8474 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8475 * the caller) be loading it later and it must be free (see first loop).
8476 */
8477 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8478 {
8479 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8480 {
8481 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8482 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8483 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8484 {
8485 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8486 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8487 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8488 pVar->idxReg = UINT8_MAX;
8489 }
8490 else
8491 {
8492 /* Use ARG0 as temp for stuff we need registers for. */
8493 switch (pVar->enmKind)
8494 {
8495 case kIemNativeVarKind_Stack:
8496 {
8497 uint8_t const idxStackSlot = pVar->idxStackSlot;
8498 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8499 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8500 iemNativeStackCalcBpDisp(idxStackSlot));
8501 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8502 continue;
8503 }
8504
8505 case kIemNativeVarKind_Immediate:
8506 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8507 continue;
8508
8509 case kIemNativeVarKind_VarRef:
8510 {
8511 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8512 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8513 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8514 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8515 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8516# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8517 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8518 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8519 if ( fSimdReg
8520 && idxRegOther != UINT8_MAX)
8521 {
8522 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8523 if (cbVar == sizeof(RTUINT128U))
8524 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8525 else
8526 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8527 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8528 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8529 }
8530 else
8531# endif
8532 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8533 {
8534 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8535 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8536 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8537 }
8538 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8539 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8540 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8541 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8542 continue;
8543 }
8544
8545 case kIemNativeVarKind_GstRegRef:
8546 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8547 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8548 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8549 continue;
8550
8551 case kIemNativeVarKind_Invalid:
8552 case kIemNativeVarKind_End:
8553 break;
8554 }
8555 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8556 }
8557 }
8558# if 0 //def VBOX_STRICT
8559 iemNativeRegAssertSanity(pReNative);
8560# endif
8561 }
8562#else
8563 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8564#endif
8565
8566 /*
8567 * Make sure the argument variables are loaded into their respective registers.
8568 *
8569 * We can optimize this by ASSUMING that any register allocations are for
8570 * registeres that have already been loaded and are ready. The previous step
8571 * saw to that.
8572 */
8573 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8574 {
8575 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8576 {
8577 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8578 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8579 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8580 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8581 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8582 else
8583 {
8584 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8585 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8586 {
8587 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8588 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8589 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8590 | RT_BIT_32(idxArgReg);
8591 pVar->idxReg = idxArgReg;
8592 }
8593 else
8594 {
8595 /* Use ARG0 as temp for stuff we need registers for. */
8596 switch (pVar->enmKind)
8597 {
8598 case kIemNativeVarKind_Stack:
8599 {
8600 uint8_t const idxStackSlot = pVar->idxStackSlot;
8601 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8602 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8603 continue;
8604 }
8605
8606 case kIemNativeVarKind_Immediate:
8607 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8608 continue;
8609
8610 case kIemNativeVarKind_VarRef:
8611 {
8612 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8613 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8614 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8615 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8616 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8617 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8618#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8619 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8620 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8621 if ( fSimdReg
8622 && idxRegOther != UINT8_MAX)
8623 {
8624 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8625 if (cbVar == sizeof(RTUINT128U))
8626 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8627 else
8628 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8629 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8630 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8631 }
8632 else
8633#endif
8634 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8635 {
8636 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8637 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8638 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8639 }
8640 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8641 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8642 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8643 continue;
8644 }
8645
8646 case kIemNativeVarKind_GstRegRef:
8647 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8648 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8649 continue;
8650
8651 case kIemNativeVarKind_Invalid:
8652 case kIemNativeVarKind_End:
8653 break;
8654 }
8655 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8656 }
8657 }
8658 }
8659#if 0 //def VBOX_STRICT
8660 iemNativeRegAssertSanity(pReNative);
8661#endif
8662 }
8663#ifdef VBOX_STRICT
8664 else
8665 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8666 {
8667 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8668 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8669 }
8670#endif
8671
8672 /*
8673 * Free all argument variables (simplified).
8674 * Their lifetime always expires with the call they are for.
8675 */
8676 /** @todo Make the python script check that arguments aren't used after
8677 * IEM_MC_CALL_XXXX. */
8678 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8679 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8680 * an argument value. There is also some FPU stuff. */
8681 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8682 {
8683 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8684 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8685
8686 /* no need to free registers: */
8687 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8688 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8689 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8690 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8691 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8692 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8693
8694 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8695 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8696 iemNativeVarFreeStackSlots(pReNative, idxVar);
8697 }
8698 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8699
8700 /*
8701 * Flush volatile registers as we make the call.
8702 */
8703 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8704
8705 return off;
8706}
8707
8708
8709
8710/*********************************************************************************************************************************
8711* TLB Lookup. *
8712*********************************************************************************************************************************/
8713
8714/**
8715 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8716 */
8717DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8718{
8719 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8720 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8721 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8722 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8723 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8724
8725 /* Do the lookup manually. */
8726 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8727 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8728 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8729 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8730 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8731 {
8732 /*
8733 * Check TLB page table level access flags.
8734 */
8735 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8736 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8737 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8738 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8739 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8740 | IEMTLBE_F_PG_UNASSIGNED
8741 | IEMTLBE_F_PT_NO_ACCESSED
8742 | fNoWriteNoDirty | fNoUser);
8743 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8744 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8745 {
8746 /*
8747 * Return the address.
8748 */
8749 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8750 if ((uintptr_t)pbAddr == uResult)
8751 return;
8752 RT_NOREF(cbMem);
8753 AssertFailed();
8754 }
8755 else
8756 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8757 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8758 }
8759 else
8760 AssertFailed();
8761 RT_BREAKPOINT();
8762}
8763
8764/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8765
8766
8767
8768/*********************************************************************************************************************************
8769* Recompiler Core. *
8770*********************************************************************************************************************************/
8771
8772/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8773static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8774{
8775 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8776 pDis->cbCachedInstr += cbMaxRead;
8777 RT_NOREF(cbMinRead);
8778 return VERR_NO_DATA;
8779}
8780
8781
8782DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8783{
8784 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8785 {
8786#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8787 ENTRY(fLocalForcedActions),
8788 ENTRY(iem.s.rcPassUp),
8789 ENTRY(iem.s.fExec),
8790 ENTRY(iem.s.pbInstrBuf),
8791 ENTRY(iem.s.uInstrBufPc),
8792 ENTRY(iem.s.GCPhysInstrBuf),
8793 ENTRY(iem.s.cbInstrBufTotal),
8794 ENTRY(iem.s.idxTbCurInstr),
8795 ENTRY(iem.s.fSkippingEFlags),
8796#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8797 ENTRY(iem.s.uPcUpdatingDebug),
8798#endif
8799#ifdef VBOX_WITH_STATISTICS
8800 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8801 ENTRY(iem.s.StatNativeTlbHitsForStore),
8802 ENTRY(iem.s.StatNativeTlbHitsForStack),
8803 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8804 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8805 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8806 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8807 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8808#endif
8809 ENTRY(iem.s.DataTlb.uTlbRevision),
8810 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8811 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8812 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8813 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8814 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8815 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8816 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8817 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8818 ENTRY(iem.s.DataTlb.aEntries),
8819 ENTRY(iem.s.CodeTlb.uTlbRevision),
8820 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8821 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8822 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8823 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8824 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8825 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8826 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8827 ENTRY(iem.s.CodeTlb.aEntries),
8828 ENTRY(pVMR3),
8829 ENTRY(cpum.GstCtx.rax),
8830 ENTRY(cpum.GstCtx.ah),
8831 ENTRY(cpum.GstCtx.rcx),
8832 ENTRY(cpum.GstCtx.ch),
8833 ENTRY(cpum.GstCtx.rdx),
8834 ENTRY(cpum.GstCtx.dh),
8835 ENTRY(cpum.GstCtx.rbx),
8836 ENTRY(cpum.GstCtx.bh),
8837 ENTRY(cpum.GstCtx.rsp),
8838 ENTRY(cpum.GstCtx.rbp),
8839 ENTRY(cpum.GstCtx.rsi),
8840 ENTRY(cpum.GstCtx.rdi),
8841 ENTRY(cpum.GstCtx.r8),
8842 ENTRY(cpum.GstCtx.r9),
8843 ENTRY(cpum.GstCtx.r10),
8844 ENTRY(cpum.GstCtx.r11),
8845 ENTRY(cpum.GstCtx.r12),
8846 ENTRY(cpum.GstCtx.r13),
8847 ENTRY(cpum.GstCtx.r14),
8848 ENTRY(cpum.GstCtx.r15),
8849 ENTRY(cpum.GstCtx.es.Sel),
8850 ENTRY(cpum.GstCtx.es.u64Base),
8851 ENTRY(cpum.GstCtx.es.u32Limit),
8852 ENTRY(cpum.GstCtx.es.Attr),
8853 ENTRY(cpum.GstCtx.cs.Sel),
8854 ENTRY(cpum.GstCtx.cs.u64Base),
8855 ENTRY(cpum.GstCtx.cs.u32Limit),
8856 ENTRY(cpum.GstCtx.cs.Attr),
8857 ENTRY(cpum.GstCtx.ss.Sel),
8858 ENTRY(cpum.GstCtx.ss.u64Base),
8859 ENTRY(cpum.GstCtx.ss.u32Limit),
8860 ENTRY(cpum.GstCtx.ss.Attr),
8861 ENTRY(cpum.GstCtx.ds.Sel),
8862 ENTRY(cpum.GstCtx.ds.u64Base),
8863 ENTRY(cpum.GstCtx.ds.u32Limit),
8864 ENTRY(cpum.GstCtx.ds.Attr),
8865 ENTRY(cpum.GstCtx.fs.Sel),
8866 ENTRY(cpum.GstCtx.fs.u64Base),
8867 ENTRY(cpum.GstCtx.fs.u32Limit),
8868 ENTRY(cpum.GstCtx.fs.Attr),
8869 ENTRY(cpum.GstCtx.gs.Sel),
8870 ENTRY(cpum.GstCtx.gs.u64Base),
8871 ENTRY(cpum.GstCtx.gs.u32Limit),
8872 ENTRY(cpum.GstCtx.gs.Attr),
8873 ENTRY(cpum.GstCtx.rip),
8874 ENTRY(cpum.GstCtx.eflags),
8875 ENTRY(cpum.GstCtx.uRipInhibitInt),
8876 ENTRY(cpum.GstCtx.cr0),
8877 ENTRY(cpum.GstCtx.cr4),
8878 ENTRY(cpum.GstCtx.aXcr[0]),
8879 ENTRY(cpum.GstCtx.aXcr[1]),
8880#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8881 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8882 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8883 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8884 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8885 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8886 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8887 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8888 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8889 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8890 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8891 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8892 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8893 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8894 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8895 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8896 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8897 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8898 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8899 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8900 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8901 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8902 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8903 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8904 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8905 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8906 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8907 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8908 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8909 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8910 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8911 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8912 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8913 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8914#endif
8915#undef ENTRY
8916 };
8917#ifdef VBOX_STRICT
8918 static bool s_fOrderChecked = false;
8919 if (!s_fOrderChecked)
8920 {
8921 s_fOrderChecked = true;
8922 uint32_t offPrev = s_aMembers[0].off;
8923 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8924 {
8925 Assert(s_aMembers[i].off > offPrev);
8926 offPrev = s_aMembers[i].off;
8927 }
8928 }
8929#endif
8930
8931 /*
8932 * Binary lookup.
8933 */
8934 unsigned iStart = 0;
8935 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8936 for (;;)
8937 {
8938 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8939 uint32_t const offCur = s_aMembers[iCur].off;
8940 if (off < offCur)
8941 {
8942 if (iCur != iStart)
8943 iEnd = iCur;
8944 else
8945 break;
8946 }
8947 else if (off > offCur)
8948 {
8949 if (iCur + 1 < iEnd)
8950 iStart = iCur + 1;
8951 else
8952 break;
8953 }
8954 else
8955 return s_aMembers[iCur].pszName;
8956 }
8957#ifdef VBOX_WITH_STATISTICS
8958 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8959 return "iem.s.acThreadedFuncStats[iFn]";
8960#endif
8961 return NULL;
8962}
8963
8964
8965/**
8966 * Translates a label to a name.
8967 */
8968static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
8969{
8970 switch (enmLabel)
8971 {
8972#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
8973 STR_CASE_CMN(Invalid);
8974 STR_CASE_CMN(RaiseDe);
8975 STR_CASE_CMN(RaiseUd);
8976 STR_CASE_CMN(RaiseSseRelated);
8977 STR_CASE_CMN(RaiseAvxRelated);
8978 STR_CASE_CMN(RaiseSseAvxFpRelated);
8979 STR_CASE_CMN(RaiseNm);
8980 STR_CASE_CMN(RaiseGp0);
8981 STR_CASE_CMN(RaiseMf);
8982 STR_CASE_CMN(RaiseXf);
8983 STR_CASE_CMN(ObsoleteTb);
8984 STR_CASE_CMN(NeedCsLimChecking);
8985 STR_CASE_CMN(CheckBranchMiss);
8986 STR_CASE_CMN(ReturnSuccess);
8987 STR_CASE_CMN(ReturnBreak);
8988 STR_CASE_CMN(ReturnBreakFF);
8989 STR_CASE_CMN(ReturnWithFlags);
8990 STR_CASE_CMN(ReturnBreakViaLookup);
8991 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
8992 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
8993 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
8994 STR_CASE_CMN(NonZeroRetOrPassUp);
8995#undef STR_CASE_CMN
8996#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
8997 STR_CASE_LBL(LoopJumpTarget);
8998 STR_CASE_LBL(If);
8999 STR_CASE_LBL(Else);
9000 STR_CASE_LBL(Endif);
9001 STR_CASE_LBL(CheckIrq);
9002 STR_CASE_LBL(TlbLookup);
9003 STR_CASE_LBL(TlbMiss);
9004 STR_CASE_LBL(TlbDone);
9005 case kIemNativeLabelType_End: break;
9006 }
9007 return NULL;
9008}
9009
9010
9011/** Info for the symbols resolver used when disassembling. */
9012typedef struct IEMNATIVDISASMSYMCTX
9013{
9014 PVMCPU pVCpu;
9015 PCIEMTB pTb;
9016 PCIEMNATIVEPERCHUNKCTX pCtx;
9017#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9018 PCIEMTBDBG pDbgInfo;
9019#endif
9020} IEMNATIVDISASMSYMCTX;
9021typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9022
9023
9024/**
9025 * Resolve address to symbol, if we can.
9026 */
9027static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9028{
9029 PCIEMTB const pTb = pSymCtx->pTb;
9030 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9031 if (offNative <= pTb->Native.cInstructions)
9032 {
9033#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9034 /*
9035 * Scan debug info for a matching label.
9036 * Since the debug info should be 100% linear, we can do a binary search here.
9037 */
9038 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9039 if (pDbgInfo)
9040 {
9041 uint32_t const cEntries = pDbgInfo->cEntries;
9042 uint32_t idxEnd = cEntries;
9043 uint32_t idxStart = 0;
9044 for (;;)
9045 {
9046 /* Find a NativeOffset record close to the midpoint. */
9047 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9048 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9049 idx--;
9050 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9051 {
9052 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9053 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9054 idx++;
9055 if (idx >= idxEnd)
9056 break;
9057 }
9058
9059 /* Do the binary searching thing. */
9060 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9061 {
9062 if (idx > idxStart)
9063 idxEnd = idx;
9064 else
9065 break;
9066 }
9067 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9068 {
9069 idx += 1;
9070 if (idx < idxEnd)
9071 idxStart = idx;
9072 else
9073 break;
9074 }
9075 else
9076 {
9077 /* Got a matching offset, scan forward till we hit a label, but
9078 stop when the native offset changes. */
9079 while (++idx < cEntries)
9080 switch (pDbgInfo->aEntries[idx].Gen.uType)
9081 {
9082 case kIemTbDbgEntryType_Label:
9083 {
9084 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9085 const char * const pszName = iemNativeGetLabelName(enmLabel);
9086 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9087 return pszName;
9088 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9089 return pszBuf;
9090 }
9091
9092 case kIemTbDbgEntryType_NativeOffset:
9093 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9094 return NULL;
9095 break;
9096 }
9097 break;
9098 }
9099 }
9100 }
9101#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9102 }
9103 else
9104 {
9105 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9106 if (pChunkCtx)
9107 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9108 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9109 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9110 }
9111 RT_NOREF(pszBuf, cbBuf);
9112 return NULL;
9113}
9114
9115#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9116
9117/**
9118 * @callback_method_impl{FNDISGETSYMBOL}
9119 */
9120static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9121 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9122{
9123 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9124 if (pszSym)
9125 {
9126 *poff = 0;
9127 if (pszSym != pszBuf)
9128 return RTStrCopy(pszBuf, cchBuf, pszSym);
9129 return VINF_SUCCESS;
9130 }
9131 RT_NOREF(pDis, u32Sel);
9132 return VERR_SYMBOL_NOT_FOUND;
9133}
9134
9135#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9136
9137/**
9138 * Annotates an instruction decoded by the capstone disassembler.
9139 */
9140static const char *
9141iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9142{
9143# if defined(RT_ARCH_ARM64)
9144 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9145 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9146 {
9147 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9148 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9149 char const *psz = strchr(pInstr->op_str, '[');
9150 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9151 {
9152 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9153 int32_t off = -1;
9154 psz += 4;
9155 if (*psz == ']')
9156 off = 0;
9157 else if (*psz == ',')
9158 {
9159 psz = RTStrStripL(psz + 1);
9160 if (*psz == '#')
9161 off = RTStrToInt32(&psz[1]);
9162 /** @todo deal with index registers and LSL as well... */
9163 }
9164 if (off >= 0)
9165 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9166 }
9167 }
9168 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9169 {
9170 const char *pszAddr = strchr(pInstr->op_str, '#');
9171 if (pszAddr)
9172 {
9173 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9174 if (uAddr != 0)
9175 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9176 }
9177 }
9178# endif
9179 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9180 return NULL;
9181}
9182#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9183
9184
9185DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9186{
9187 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9188#if defined(RT_ARCH_AMD64)
9189 static const char * const a_apszMarkers[] =
9190 {
9191 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9192 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9193 };
9194#endif
9195
9196 char szDisBuf[512];
9197 DISSTATE Dis;
9198 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9199 uint32_t const cNative = pTb->Native.cInstructions;
9200 uint32_t offNative = 0;
9201#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9202 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9203#endif
9204 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9205 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9206 : DISCPUMODE_64BIT;
9207#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9208 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9209#else
9210 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9211#endif
9212#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9213 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9214#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9215 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9216#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9217# error "Port me"
9218#else
9219 csh hDisasm = ~(size_t)0;
9220# if defined(RT_ARCH_AMD64)
9221 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9222# elif defined(RT_ARCH_ARM64)
9223 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9224# else
9225# error "Port me"
9226# endif
9227 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9228
9229 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9230 //Assert(rcCs == CS_ERR_OK);
9231#endif
9232
9233 /*
9234 * Print TB info.
9235 */
9236 pHlp->pfnPrintf(pHlp,
9237 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9238 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9239 pTb, pTb->GCPhysPc,
9240#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9241 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9242#else
9243 pTb->FlatPc,
9244#endif
9245 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9246 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9247#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9248 if (pDbgInfo && pDbgInfo->cEntries > 1)
9249 {
9250 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9251
9252 /*
9253 * This disassembly is driven by the debug info which follows the native
9254 * code and indicates when it starts with the next guest instructions,
9255 * where labels are and such things.
9256 */
9257 uint32_t idxThreadedCall = 0;
9258 uint32_t idxGuestInstr = 0;
9259 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9260 uint8_t idxRange = UINT8_MAX;
9261 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9262 uint32_t offRange = 0;
9263 uint32_t offOpcodes = 0;
9264 uint32_t const cbOpcodes = pTb->cbOpcodes;
9265 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9266 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9267 uint32_t iDbgEntry = 1;
9268 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9269
9270 while (offNative < cNative)
9271 {
9272 /* If we're at or have passed the point where the next chunk of debug
9273 info starts, process it. */
9274 if (offDbgNativeNext <= offNative)
9275 {
9276 offDbgNativeNext = UINT32_MAX;
9277 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9278 {
9279 switch ((IEMTBDBGENTRYTYPE)pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9280 {
9281 case kIemTbDbgEntryType_GuestInstruction:
9282 {
9283 /* Did the exec flag change? */
9284 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9285 {
9286 pHlp->pfnPrintf(pHlp,
9287 " fExec change %#08x -> %#08x %s\n",
9288 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9289 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9290 szDisBuf, sizeof(szDisBuf)));
9291 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9292 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9293 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9294 : DISCPUMODE_64BIT;
9295 }
9296
9297 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9298 where the compilation was aborted before the opcode was recorded and the actual
9299 instruction was translated to a threaded call. This may happen when we run out
9300 of ranges, or when some complicated interrupts/FFs are found to be pending or
9301 similar. So, we just deal with it here rather than in the compiler code as it
9302 is a lot simpler to do here. */
9303 if ( idxRange == UINT8_MAX
9304 || idxRange >= cRanges
9305 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9306 {
9307 idxRange += 1;
9308 if (idxRange < cRanges)
9309 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9310 else
9311 continue;
9312 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9313 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9314 + (pTb->aRanges[idxRange].idxPhysPage == 0
9315 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9316 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9317 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9318 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9319 pTb->aRanges[idxRange].idxPhysPage);
9320 GCPhysPc += offRange;
9321 }
9322
9323 /* Disassemble the instruction. */
9324 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9325 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9326 uint32_t cbInstr = 1;
9327 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9328 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9329 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9330 if (RT_SUCCESS(rc))
9331 {
9332 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9333 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9334 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9335 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9336
9337 static unsigned const s_offMarker = 55;
9338 static char const s_szMarker[] = " ; <--- guest";
9339 if (cch < s_offMarker)
9340 {
9341 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9342 cch = s_offMarker;
9343 }
9344 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9345 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9346
9347 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9348 }
9349 else
9350 {
9351 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9352 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9353 cbInstr = 1;
9354 }
9355 idxGuestInstr++;
9356 GCPhysPc += cbInstr;
9357 offOpcodes += cbInstr;
9358 offRange += cbInstr;
9359 continue;
9360 }
9361
9362 case kIemTbDbgEntryType_ThreadedCall:
9363 pHlp->pfnPrintf(pHlp,
9364 " Call #%u to %s (%u args) - %s\n",
9365 idxThreadedCall,
9366 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9367 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9368 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9369 idxThreadedCall++;
9370 continue;
9371
9372 case kIemTbDbgEntryType_GuestRegShadowing:
9373 {
9374 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9375 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9376 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9377 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9378 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9379 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9380 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9381 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9382 else
9383 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9384 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9385 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9386 continue;
9387 }
9388
9389# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9390 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9391 {
9392 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9393 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9394 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9395 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9396 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9397 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9398 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9399 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9400 else
9401 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9402 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9403 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9404 continue;
9405 }
9406# endif
9407
9408 case kIemTbDbgEntryType_Label:
9409 {
9410 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9411 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9412 {
9413 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9414 ? " ; regs state restored pre-if-block" : "";
9415 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9416 }
9417 else
9418 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9419 continue;
9420 }
9421
9422 case kIemTbDbgEntryType_NativeOffset:
9423 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9424 Assert(offDbgNativeNext >= offNative);
9425 break;
9426
9427# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9428 case kIemTbDbgEntryType_DelayedPcUpdate:
9429 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9430 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9431 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9432 continue;
9433# endif
9434
9435# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9436 case kIemTbDbgEntryType_GuestRegDirty:
9437 {
9438 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9439 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9440 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9441 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9442 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9443 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9444 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9445 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9446 pszGstReg, pszHstReg);
9447 continue;
9448 }
9449
9450 case kIemTbDbgEntryType_GuestRegWriteback:
9451 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9452 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9453 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9454 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9455 continue;
9456# endif
9457
9458# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9459 case kIemTbDbgEntryType_PostponedEFlagsCalc:
9460 {
9461 const char *pszOp = "!unknown!";
9462 switch ((IEMNATIVE_POSTPONED_EFL_OP_T)pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.enmOp)
9463 {
9464 case kIemNativePostponedEflOp_Logical: pszOp = "logical"; break;
9465 case kIemNativePostponedEflOp_Invalid: break;
9466 case kIemNativePostponedEflOp_End: break;
9467 }
9468 pHlp->pfnPrintf(pHlp, " Postponed EFLAGS calc #%u: %s %u bits\n",
9469 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.idxEmit, pszOp,
9470 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.cOpBits);
9471 continue;
9472 }
9473# endif
9474 default:
9475 AssertFailed();
9476 continue;
9477 }
9478 /* Break out of the loop at kIemTbDbgEntryType_NativeOffset. */
9479 iDbgEntry++;
9480 break;
9481 }
9482 }
9483
9484 /*
9485 * Disassemble the next native instruction.
9486 */
9487 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9488# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9489 uint32_t cbInstr = sizeof(paNative[0]);
9490 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9491 if (RT_SUCCESS(rc))
9492 {
9493# if defined(RT_ARCH_AMD64)
9494 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9495 {
9496 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9497 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9498 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9499 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9500 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9501 uInfo & 0x8000 ? "recompiled" : "todo");
9502 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9503 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9504 else
9505 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9506 }
9507 else
9508# endif
9509 {
9510 const char *pszAnnotation = NULL;
9511# ifdef RT_ARCH_AMD64
9512 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9513 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9514 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9515 iemNativeDisasmGetSymbolCb, &SymCtx);
9516 PCDISOPPARAM pMemOp;
9517 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9518 pMemOp = &Dis.aParams[0];
9519 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9520 pMemOp = &Dis.aParams[1];
9521 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9522 pMemOp = &Dis.aParams[2];
9523 else
9524 pMemOp = NULL;
9525 if ( pMemOp
9526 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9527 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9528 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9529 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9530
9531# elif defined(RT_ARCH_ARM64)
9532 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9533 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9534 iemNativeDisasmGetSymbolCb, &SymCtx);
9535# else
9536# error "Port me"
9537# endif
9538 if (pszAnnotation)
9539 {
9540 static unsigned const s_offAnnotation = 55;
9541 size_t const cchAnnotation = strlen(pszAnnotation);
9542 size_t cchDis = strlen(szDisBuf);
9543 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9544 {
9545 if (cchDis < s_offAnnotation)
9546 {
9547 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9548 cchDis = s_offAnnotation;
9549 }
9550 szDisBuf[cchDis++] = ' ';
9551 szDisBuf[cchDis++] = ';';
9552 szDisBuf[cchDis++] = ' ';
9553 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9554 }
9555 }
9556 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9557 }
9558 }
9559 else
9560 {
9561# if defined(RT_ARCH_AMD64)
9562 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9563 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9564# elif defined(RT_ARCH_ARM64)
9565 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9566# else
9567# error "Port me"
9568# endif
9569 cbInstr = sizeof(paNative[0]);
9570 }
9571 offNative += cbInstr / sizeof(paNative[0]);
9572
9573# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9574 cs_insn *pInstr;
9575 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9576 (uintptr_t)pNativeCur, 1, &pInstr);
9577 if (cInstrs > 0)
9578 {
9579 Assert(cInstrs == 1);
9580 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9581 size_t const cchOp = strlen(pInstr->op_str);
9582# if defined(RT_ARCH_AMD64)
9583 if (pszAnnotation)
9584 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9585 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9586 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9587 else
9588 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9589 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9590
9591# else
9592 if (pszAnnotation)
9593 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9594 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9595 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9596 else
9597 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9598 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9599# endif
9600 offNative += pInstr->size / sizeof(*pNativeCur);
9601 cs_free(pInstr, cInstrs);
9602 }
9603 else
9604 {
9605# if defined(RT_ARCH_AMD64)
9606 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9607 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9608# else
9609 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9610# endif
9611 offNative++;
9612 }
9613# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9614 }
9615 }
9616 else
9617#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9618 {
9619 /*
9620 * No debug info, just disassemble the x86 code and then the native code.
9621 *
9622 * First the guest code:
9623 */
9624 for (unsigned i = 0; i < pTb->cRanges; i++)
9625 {
9626 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9627 + (pTb->aRanges[i].idxPhysPage == 0
9628 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9629 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9630 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9631 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9632 unsigned off = pTb->aRanges[i].offOpcodes;
9633 /** @todo this ain't working when crossing pages! */
9634 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9635 while (off < cbOpcodes)
9636 {
9637 uint32_t cbInstr = 1;
9638 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9639 &pTb->pabOpcodes[off], cbOpcodes - off,
9640 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9641 if (RT_SUCCESS(rc))
9642 {
9643 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9644 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9645 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9646 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9647 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9648 GCPhysPc += cbInstr;
9649 off += cbInstr;
9650 }
9651 else
9652 {
9653 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9654 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9655 break;
9656 }
9657 }
9658 }
9659
9660 /*
9661 * Then the native code:
9662 */
9663 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9664 while (offNative < cNative)
9665 {
9666 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9667#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9668 uint32_t cbInstr = sizeof(paNative[0]);
9669 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9670 if (RT_SUCCESS(rc))
9671 {
9672# if defined(RT_ARCH_AMD64)
9673 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9674 {
9675 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9676 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9677 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9678 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9679 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9680 uInfo & 0x8000 ? "recompiled" : "todo");
9681 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9682 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9683 else
9684 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9685 }
9686 else
9687# endif
9688 {
9689# ifdef RT_ARCH_AMD64
9690 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9691 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9692 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9693 iemNativeDisasmGetSymbolCb, &SymCtx);
9694# elif defined(RT_ARCH_ARM64)
9695 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9696 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9697 iemNativeDisasmGetSymbolCb, &SymCtx);
9698# else
9699# error "Port me"
9700# endif
9701 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9702 }
9703 }
9704 else
9705 {
9706# if defined(RT_ARCH_AMD64)
9707 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9708 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9709# else
9710 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9711# endif
9712 cbInstr = sizeof(paNative[0]);
9713 }
9714 offNative += cbInstr / sizeof(paNative[0]);
9715
9716#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9717 cs_insn *pInstr;
9718 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9719 (uintptr_t)pNativeCur, 1, &pInstr);
9720 if (cInstrs > 0)
9721 {
9722 Assert(cInstrs == 1);
9723 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9724 size_t const cchOp = strlen(pInstr->op_str);
9725# if defined(RT_ARCH_AMD64)
9726 if (pszAnnotation)
9727 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9728 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9729 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9730 else
9731 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9732 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9733
9734# else
9735 if (pszAnnotation)
9736 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9737 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9738 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9739 else
9740 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9741 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9742# endif
9743 offNative += pInstr->size / sizeof(*pNativeCur);
9744 cs_free(pInstr, cInstrs);
9745 }
9746 else
9747 {
9748# if defined(RT_ARCH_AMD64)
9749 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9750 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9751# else
9752 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9753# endif
9754 offNative++;
9755 }
9756#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9757 }
9758 }
9759
9760#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9761 /* Cleanup. */
9762 cs_close(&hDisasm);
9763#endif
9764}
9765
9766
9767/** Emit alignment padding between labels / functions. */
9768DECL_INLINE_THROW(uint32_t)
9769iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9770{
9771 if (off & fAlignMask)
9772 {
9773 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9774 while (off & fAlignMask)
9775#if defined(RT_ARCH_AMD64)
9776 pCodeBuf[off++] = 0xcc;
9777#elif defined(RT_ARCH_ARM64)
9778 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9779#else
9780# error "port me"
9781#endif
9782 }
9783 return off;
9784}
9785
9786
9787/**
9788 * Called when a new chunk is allocate to emit common per-chunk code.
9789 *
9790 * Allocates a per-chunk context directly from the chunk itself and place the
9791 * common code there.
9792 *
9793 * @returns VBox status code.
9794 * @param pVCpu The cross context virtual CPU structure of the calling
9795 * thread.
9796 * @param idxChunk The index of the chunk being added and requiring a
9797 * common code context.
9798 * @param ppCtx Where to return the pointer to the chunk context start.
9799 */
9800DECLHIDDEN(int) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk, PCIEMNATIVEPERCHUNKCTX *ppCtx)
9801{
9802 *ppCtx = NULL;
9803
9804 /*
9805 * Allocate a new recompiler state (since we're likely to be called while
9806 * the default one is fully loaded already with a recompiled TB).
9807 *
9808 * This is a bit of overkill, but this isn't a frequently used code path.
9809 */
9810 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9811 AssertReturn(pReNative, VERR_NO_MEMORY);
9812
9813#if defined(RT_ARCH_AMD64)
9814 uint32_t const fAlignMask = 15;
9815#elif defined(RT_ARCH_ARM64)
9816 uint32_t const fAlignMask = 31 / 4;
9817#else
9818# error "port me"
9819#endif
9820 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9821 int rc = VINF_SUCCESS;
9822 uint32_t off = 0;
9823
9824 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9825 {
9826 /*
9827 * Emit the epilog code.
9828 */
9829 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9830 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9831 uint32_t const offReturnWithStatus = off;
9832 off = iemNativeEmitCoreEpilog(pReNative, off);
9833
9834 /*
9835 * Generate special jump labels. All of these gets a copy of the epilog code.
9836 */
9837 static struct
9838 {
9839 IEMNATIVELABELTYPE enmExitReason;
9840 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9841 } const s_aSpecialWithEpilogs[] =
9842 {
9843 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9844 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9845 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9846 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9847 };
9848 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9849 {
9850 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9851 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9852 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9853 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9854 off = iemNativeEmitCoreEpilog(pReNative, off);
9855 }
9856
9857 /*
9858 * Do what iemNativeEmitReturnBreakViaLookup does.
9859 */
9860 static struct
9861 {
9862 IEMNATIVELABELTYPE enmExitReason;
9863 uintptr_t pfnHelper;
9864 } const s_aViaLookup[] =
9865 {
9866 { kIemNativeLabelType_ReturnBreakViaLookup,
9867 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9868 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9869 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9870 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9871 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9872 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9873 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9874 };
9875 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9876 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9877 {
9878 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9879 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9880 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9881 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9882 }
9883
9884 /*
9885 * Generate simple TB tail labels that just calls a help with a pVCpu
9886 * arg and either return or longjmps/throws a non-zero status.
9887 */
9888 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9889 static struct
9890 {
9891 IEMNATIVELABELTYPE enmExitReason;
9892 bool fWithEpilog;
9893 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9894 } const s_aSimpleTailLabels[] =
9895 {
9896 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9897 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9898 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9899 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9900 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9901 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9902 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9903 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9904 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9905 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9906 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9907 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9908 };
9909 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9910 {
9911 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9912 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9913 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9914
9915 /* int pfnCallback(PVMCPUCC pVCpu) */
9916 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9917 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9918
9919 /* If the callback is supposed to return with a status code we inline the epilog
9920 sequence for better speed. Otherwise, if the callback shouldn't return because
9921 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
9922 if (s_aSimpleTailLabels[i].fWithEpilog)
9923 off = iemNativeEmitCoreEpilog(pReNative, off);
9924 else
9925 {
9926#ifdef VBOX_STRICT
9927 off = iemNativeEmitBrk(pReNative, off, 0x2201);
9928#endif
9929 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
9930 }
9931 }
9932
9933
9934#ifdef VBOX_STRICT
9935 /* Make sure we've generate code for all labels. */
9936 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
9937 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
9938#endif
9939 }
9940 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9941 {
9942 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
9943 iemNativeTerm(pReNative);
9944 return rc;
9945 }
9946 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9947
9948 /*
9949 * Allocate memory for the context (first) and the common code (last).
9950 */
9951 PIEMNATIVEPERCHUNKCTX pCtx;
9952 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
9953 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
9954 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
9955 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
9956 AssertLogRelMsgReturnStmt(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk),
9957 iemNativeTerm(pReNative), VERR_OUT_OF_RESOURCES);
9958
9959 /*
9960 * Copy over the generated code.
9961 * There should be no fixups or labels defined here.
9962 */
9963 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
9964 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
9965
9966 Assert(pReNative->cFixups == 0);
9967 Assert(pReNative->cLabels == 0);
9968
9969 /*
9970 * Initialize the context.
9971 */
9972 AssertCompile(kIemNativeLabelType_Invalid == 0);
9973 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
9974 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
9975 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
9976 {
9977 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
9978 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
9979 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
9980 }
9981
9982 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
9983
9984 iemNativeTerm(pReNative);
9985 *ppCtx = pCtx;
9986 return VINF_SUCCESS;
9987}
9988
9989
9990/**
9991 * Recompiles the given threaded TB into a native one.
9992 *
9993 * In case of failure the translation block will be returned as-is.
9994 *
9995 * @returns pTb.
9996 * @param pVCpu The cross context virtual CPU structure of the calling
9997 * thread.
9998 * @param pTb The threaded translation to recompile to native.
9999 */
10000DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10001{
10002#if 0 /* For profiling the native recompiler code. */
10003l_profile_again:
10004#endif
10005 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10006
10007 /*
10008 * The first time thru, we allocate the recompiler state and save it,
10009 * all the other times we'll just reuse the saved one after a quick reset.
10010 */
10011 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10012 if (RT_LIKELY(pReNative))
10013 iemNativeReInit(pReNative, pTb);
10014 else
10015 {
10016 pReNative = iemNativeInit(pVCpu, pTb);
10017 AssertReturn(pReNative, pTb);
10018 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10019 }
10020
10021#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10022 /*
10023 * First do liveness analysis. This is done backwards.
10024 */
10025 {
10026 uint32_t idxCall = pTb->Thrd.cCalls;
10027 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10028 { /* likely */ }
10029 else
10030 {
10031 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10032 while (idxCall > cAlloc)
10033 cAlloc *= 2;
10034 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10035 AssertReturn(pvNew, pTb);
10036 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10037 pReNative->cLivenessEntriesAlloc = cAlloc;
10038 }
10039 AssertReturn(idxCall > 0, pTb);
10040 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10041
10042 /* The initial (final) entry. */
10043 idxCall--;
10044 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10045
10046 /* Loop backwards thru the calls and fill in the other entries. */
10047 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10048 while (idxCall > 0)
10049 {
10050 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10051 Assert(pfnLiveness);
10052 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10053 pCallEntry--;
10054 idxCall--;
10055 }
10056 }
10057#endif
10058
10059 /*
10060 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10061 * for aborting if an error happens.
10062 */
10063 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10064#ifdef LOG_ENABLED
10065 uint32_t const cCallsOrg = cCallsLeft;
10066#endif
10067 uint32_t off = 0;
10068 int rc = VINF_SUCCESS;
10069 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10070 {
10071 /*
10072 * Convert the calls to native code.
10073 */
10074#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10075 int32_t iGstInstr = -1;
10076#endif
10077#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10078 uint32_t cThreadedCalls = 0;
10079 uint32_t cRecompiledCalls = 0;
10080#endif
10081#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10082 uint32_t idxCurCall = 0;
10083#endif
10084 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10085 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10086 while (cCallsLeft-- > 0)
10087 {
10088 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10089#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10090 pReNative->idxCurCall = idxCurCall;
10091#endif
10092
10093#ifdef IEM_WITH_INTRA_TB_JUMPS
10094 /*
10095 * Define label for jump targets (currently only the first entry).
10096 */
10097 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10098 { /* likely */ }
10099 else
10100 {
10101 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10102 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10103 }
10104#endif
10105
10106 /*
10107 * Debug info, assembly markup and statistics.
10108 */
10109#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10110 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10111 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10112#endif
10113#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10114 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10115 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10116 {
10117 if (iGstInstr < (int32_t)pTb->cInstructions)
10118 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10119 else
10120 Assert(iGstInstr == pTb->cInstructions);
10121 iGstInstr = pCallEntry->idxInstr;
10122 }
10123 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10124#endif
10125#if defined(VBOX_STRICT)
10126 off = iemNativeEmitMarker(pReNative, off,
10127 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10128#endif
10129#if defined(VBOX_STRICT)
10130 iemNativeRegAssertSanity(pReNative);
10131#endif
10132#ifdef VBOX_WITH_STATISTICS
10133 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10134#endif
10135
10136#if 0
10137 if ( pTb->GCPhysPc == 0x00000000000c1240
10138 && idxCurCall == 67)
10139 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10140#endif
10141
10142 /*
10143 * Actual work.
10144 */
10145 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10146 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10147 if (pfnRecom) /** @todo stats on this. */
10148 {
10149 off = pfnRecom(pReNative, off, pCallEntry);
10150 STAM_REL_STATS({cRecompiledCalls++;});
10151 }
10152 else
10153 {
10154 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10155 STAM_REL_STATS({cThreadedCalls++;});
10156 }
10157 Assert(off <= pReNative->cInstrBufAlloc);
10158 Assert(pReNative->cCondDepth == 0);
10159
10160#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10161 if (LogIs2Enabled())
10162 {
10163 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10164# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10165 static const char s_achState[] = "CUXI";
10166# else
10167 /* 0123 4567 89ab cdef */
10168 /* CCCC CCCC */
10169 /* WWWW WWWW */
10170 /* RR RR RR RR */
10171 /* P P P P P P P P */
10172 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10173# endif
10174
10175 char szGpr[17];
10176 for (unsigned i = 0; i < 16; i++)
10177 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10178 szGpr[16] = '\0';
10179
10180 char szSegBase[X86_SREG_COUNT + 1];
10181 char szSegLimit[X86_SREG_COUNT + 1];
10182 char szSegAttrib[X86_SREG_COUNT + 1];
10183 char szSegSel[X86_SREG_COUNT + 1];
10184 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10185 {
10186 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10187 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10188 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10189 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10190 }
10191 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10192 = szSegSel[X86_SREG_COUNT] = '\0';
10193
10194 char szEFlags[IEMLIVENESSBIT_IDX_EFL_COUNT + 1];
10195 for (unsigned i = 0; i < IEMLIVENESSBIT_IDX_EFL_COUNT; i++)
10196 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10197 szEFlags[7] = '\0';
10198
10199 Log2(("liveness: gpr=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10200 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10201 }
10202#endif
10203
10204 /*
10205 * Advance.
10206 */
10207 pCallEntry++;
10208#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10209 idxCurCall++;
10210#endif
10211 }
10212
10213 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10214 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10215 if (!cThreadedCalls)
10216 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10217
10218 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, UINT32_MAX);
10219
10220#ifdef VBOX_WITH_STATISTICS
10221 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10222#endif
10223
10224 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10225 off = iemNativeRegFlushPendingWrites(pReNative, off);
10226
10227 /*
10228 * Jump to the common per-chunk epilog code.
10229 */
10230 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10231 off = iemNativeEmitTbExit<kIemNativeLabelType_ReturnSuccess, true, false>(pReNative, off);
10232
10233 /*
10234 * Generate tail labels with jumps to the common per-chunk code on non-x86 hosts.
10235 */
10236#ifndef RT_ARCH_AMD64
10237 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10238 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10239 AssertCompile(kIemNativeLabelType_Invalid == 0);
10240 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10241 if (fTailLabels)
10242 {
10243 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, kIemNativeLabelType_LastTbExit + 1);
10244 do
10245 {
10246 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10247 fTailLabels &= ~RT_BIT_64(enmLabel);
10248
10249 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10250 AssertContinue(idxLabel != UINT32_MAX);
10251 iemNativeLabelDefine(pReNative, idxLabel, off);
10252
10253 iemNativeAddTbExitFixup(pReNative, off, enmLabel);
10254# ifdef RT_ARCH_ARM64
10255 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
10256# else
10257# error "port me"
10258# endif
10259 } while (fTailLabels);
10260 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10261 }
10262#else
10263 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10264#endif
10265 }
10266 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10267 {
10268 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10269 return pTb;
10270 }
10271 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10272 Assert(off <= pReNative->cInstrBufAlloc);
10273
10274 /*
10275 * Make sure all labels has been defined.
10276 */
10277 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10278#ifdef VBOX_STRICT
10279 uint32_t const cLabels = pReNative->cLabels;
10280 for (uint32_t i = 0; i < cLabels; i++)
10281 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10282#endif
10283
10284#if 0 /* For profiling the native recompiler code. */
10285 if (pTb->Thrd.cCalls >= 136)
10286 {
10287 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10288 goto l_profile_again;
10289 }
10290#endif
10291
10292 /*
10293 * Allocate executable memory, copy over the code we've generated.
10294 */
10295 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10296 if (pTbAllocator->pDelayedFreeHead)
10297 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10298
10299 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10300 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10301 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10302 &paFinalInstrBufRx, &pCtx);
10303
10304 AssertReturn(paFinalInstrBuf, pTb);
10305 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10306
10307 /*
10308 * Apply fixups.
10309 */
10310 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10311 uint32_t const cFixups = pReNative->cFixups;
10312 for (uint32_t i = 0; i < cFixups; i++)
10313 {
10314 Assert(paFixups[i].off < off);
10315 Assert(paFixups[i].idxLabel < cLabels);
10316 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10317 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10318 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10319 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10320 switch (paFixups[i].enmType)
10321 {
10322#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10323 case kIemNativeFixupType_Rel32:
10324 Assert(paFixups[i].off + 4 <= off);
10325 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10326 continue;
10327
10328#elif defined(RT_ARCH_ARM64)
10329 case kIemNativeFixupType_RelImm26At0:
10330 {
10331 Assert(paFixups[i].off < off);
10332 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10333 Assert(offDisp >= -33554432 && offDisp < 33554432);
10334 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10335 continue;
10336 }
10337
10338 case kIemNativeFixupType_RelImm19At5:
10339 {
10340 Assert(paFixups[i].off < off);
10341 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10342 Assert(offDisp >= -262144 && offDisp < 262144);
10343 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10344 continue;
10345 }
10346
10347 case kIemNativeFixupType_RelImm14At5:
10348 {
10349 Assert(paFixups[i].off < off);
10350 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10351 Assert(offDisp >= -8192 && offDisp < 8192);
10352 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10353 continue;
10354 }
10355
10356#endif
10357 case kIemNativeFixupType_Invalid:
10358 case kIemNativeFixupType_End:
10359 break;
10360 }
10361 AssertFailed();
10362 }
10363
10364 /*
10365 * Apply TB exit fixups.
10366 */
10367 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10368 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10369 for (uint32_t i = 0; i < cTbExitFixups; i++)
10370 {
10371 Assert(paTbExitFixups[i].off < off);
10372 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10373 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10374
10375#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10376 Assert(paTbExitFixups[i].off + 4 <= off);
10377 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10378 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10379 *Ptr.pi32 = (int32_t)offDisp;
10380
10381#elif defined(RT_ARCH_ARM64)
10382 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10383 Assert(offDisp >= -33554432 && offDisp < 33554432);
10384 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10385
10386#else
10387# error "Port me!"
10388#endif
10389 }
10390
10391 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10392 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10393
10394 /*
10395 * Convert the translation block.
10396 */
10397 RTMemFree(pTb->Thrd.paCalls);
10398 pTb->Native.paInstructions = paFinalInstrBufRx;
10399 pTb->Native.cInstructions = off;
10400 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10401#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10402 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10403 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10404 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10405#endif
10406
10407 Assert(pTbAllocator->cThreadedTbs > 0);
10408 pTbAllocator->cThreadedTbs -= 1;
10409 pTbAllocator->cNativeTbs += 1;
10410 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10411
10412#ifdef LOG_ENABLED
10413 /*
10414 * Disassemble to the log if enabled.
10415 */
10416 if (LogIs3Enabled())
10417 {
10418 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10419 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10420# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10421 RTLogFlush(NULL);
10422# endif
10423 }
10424#endif
10425 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10426
10427 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10428 return pTb;
10429}
10430
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette