VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 106407

Last change on this file since 106407 was 106407, checked in by vboxsync, 7 weeks ago

VMM/IEM: Reduced the number of parameters for most iemNativeVarRegisterAcquire calls. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 457.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 106407 2024-10-16 22:30:34Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : Postponed and skipped EFLAGS calculations.
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80#include "target-x86/IEMAllN8veEmit-x86.h"
81
82
83/*
84 * Narrow down configs here to avoid wasting time on unused configs here.
85 * Note! Same checks in IEMAllThrdRecompiler.cpp.
86 */
87
88#ifndef IEM_WITH_CODE_TLB
89# error The code TLB must be enabled for the recompiler.
90#endif
91
92#ifndef IEM_WITH_DATA_TLB
93# error The data TLB must be enabled for the recompiler.
94#endif
95
96#ifndef IEM_WITH_SETJMP
97# error The setjmp approach must be enabled for the recompiler.
98#endif
99
100/** @todo eliminate this clang build hack. */
101#if RT_CLANG_PREREQ(4, 0)
102# pragma GCC diagnostic ignored "-Wunused-function"
103#endif
104
105
106/*********************************************************************************************************************************
107* Internal Functions *
108*********************************************************************************************************************************/
109#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
110static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
111#endif
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
113DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
114 IEMNATIVEGSTREG enmGstReg, uint32_t off);
115DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
116static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
117
118
119
120/*********************************************************************************************************************************
121* Native Recompilation *
122*********************************************************************************************************************************/
123
124
125/**
126 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
127 */
128IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
129{
130 pVCpu->iem.s.cInstructions += idxInstr;
131 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
132}
133
134
135/**
136 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
137 */
138DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
139{
140 uint64_t fCpu = pVCpu->fLocalForcedActions;
141 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
142 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
143 | VMCPU_FF_TLB_FLUSH
144 | VMCPU_FF_UNHALT );
145 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
146 if (RT_LIKELY( ( !fCpu
147 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
148 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
149 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
150 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
151 return false;
152 return true;
153}
154
155
156/**
157 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
158 */
159template<bool const a_fWithIrqCheck>
160IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
161 uint32_t fFlags, RTGCPHYS GCPhysPc))
162{
163 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
164 Assert(idxTbLookup < pTb->cTbLookupEntries);
165 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
166#if 1
167 PIEMTB const pNewTb = *ppNewTb;
168 if (pNewTb)
169 {
170# ifdef VBOX_STRICT
171 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
172 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
173 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
174 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
175 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
176 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
177# endif
178 if (pNewTb->GCPhysPc == GCPhysPc)
179 {
180# ifdef VBOX_STRICT
181 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
182 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
183 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
184 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
185 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
186# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
187 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
188# else
189 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
190 {
191 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
192 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
193 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
194 }
195# endif
196 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
197 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
198 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
199#endif
200
201 /*
202 * Check them + type.
203 */
204 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
205 {
206 /*
207 * Check for interrupts and stuff.
208 */
209 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
210 * The main problem are the statistics and to some degree the logging. :/ */
211 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
212 {
213 /* Do polling. */
214 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
215 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
216 {
217 /*
218 * Success. Update statistics and switch to the next TB.
219 */
220 if (a_fWithIrqCheck)
221 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
222 else
223 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
224
225 pNewTb->cUsed += 1;
226 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
227 pVCpu->iem.s.pCurTbR3 = pNewTb;
228 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
229 pVCpu->iem.s.cTbExecNative += 1;
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
231 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
232 return (uintptr_t)pNewTb->Native.paInstructions;
233 }
234 }
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
236 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
237 }
238 else
239 {
240 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
241 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
242 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
243 }
244 }
245 else
246 {
247 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
249 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
250 }
251 }
252 else
253 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
254#else
255 NOREF(GCPhysPc);
256#endif
257
258 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
259 return 0;
260}
261
262
263/**
264 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
265 */
266template <bool const a_fWithIrqCheck>
267IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
268{
269 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
270 Assert(idxTbLookup < pTb->cTbLookupEntries);
271 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
272#if 1
273 PIEMTB const pNewTb = *ppNewTb;
274 if (pNewTb)
275 {
276 /*
277 * Calculate the flags for the next TB and check if they match.
278 */
279 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
280 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
281 { /* likely */ }
282 else
283 {
284 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
285 fFlags |= IEMTB_F_INHIBIT_SHADOW;
286 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
287 fFlags |= IEMTB_F_INHIBIT_NMI;
288 }
289 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
290 {
291 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
292 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
293 { /* likely */ }
294 else
295 fFlags |= IEMTB_F_CS_LIM_CHECKS;
296 }
297 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
298
299 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
300 {
301 /*
302 * Do the TLB lookup for flat RIP and compare the result with the next TB.
303 *
304 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
305 */
306 /* Calc the effective PC. */
307 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
308 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
309 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
310
311 /* Advance within the current buffer (PAGE) when possible. */
312 RTGCPHYS GCPhysPc;
313 uint64_t off;
314 if ( pVCpu->iem.s.pbInstrBuf
315 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
316 {
317 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
318 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
319 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
320 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
321 else
322 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
323 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
324 }
325 else
326 {
327 pVCpu->iem.s.pbInstrBuf = NULL;
328 pVCpu->iem.s.offCurInstrStart = 0;
329 pVCpu->iem.s.offInstrNextByte = 0;
330 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
331 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
332 }
333
334 if (pNewTb->GCPhysPc == GCPhysPc)
335 {
336 /*
337 * Check for interrupts and stuff.
338 */
339 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
340 * The main problem are the statistics and to some degree the logging. :/ */
341 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
342 {
343 /* Do polling. */
344 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
345 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
346 {
347 /*
348 * Success. Update statistics and switch to the next TB.
349 */
350 if (a_fWithIrqCheck)
351 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
352 else
353 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
354
355 pNewTb->cUsed += 1;
356 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
357 pVCpu->iem.s.pCurTbR3 = pNewTb;
358 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
359 pVCpu->iem.s.cTbExecNative += 1;
360 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
361 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
362 return (uintptr_t)pNewTb->Native.paInstructions;
363 }
364 }
365 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
366 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
367 }
368 else
369 {
370 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
371 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
372 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
373 }
374 }
375 else
376 {
377 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
378 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
379 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
380 }
381 }
382 else
383 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
384#else
385 NOREF(fFlags);
386 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
387#endif
388
389 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
390 return 0;
391}
392
393
394/**
395 * Used by TB code when it wants to raise a \#DE.
396 */
397IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
398{
399 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
400 iemRaiseDivideErrorJmp(pVCpu);
401#ifndef _MSC_VER
402 return VINF_IEM_RAISED_XCPT; /* not reached */
403#endif
404}
405
406
407/**
408 * Used by TB code when it wants to raise a \#UD.
409 */
410IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
411{
412 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
413 iemRaiseUndefinedOpcodeJmp(pVCpu);
414#ifndef _MSC_VER
415 return VINF_IEM_RAISED_XCPT; /* not reached */
416#endif
417}
418
419
420/**
421 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
422 *
423 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
424 */
425IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
426{
427 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
428 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
429 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
430 iemRaiseUndefinedOpcodeJmp(pVCpu);
431 else
432 iemRaiseDeviceNotAvailableJmp(pVCpu);
433#ifndef _MSC_VER
434 return VINF_IEM_RAISED_XCPT; /* not reached */
435#endif
436}
437
438
439/**
440 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
441 *
442 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
443 */
444IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
445{
446 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
447 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
448 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
449 iemRaiseUndefinedOpcodeJmp(pVCpu);
450 else
451 iemRaiseDeviceNotAvailableJmp(pVCpu);
452#ifndef _MSC_VER
453 return VINF_IEM_RAISED_XCPT; /* not reached */
454#endif
455}
456
457
458/**
459 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
460 *
461 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
462 */
463IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
464{
465 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
466 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
467 iemRaiseSimdFpExceptionJmp(pVCpu);
468 else
469 iemRaiseUndefinedOpcodeJmp(pVCpu);
470#ifndef _MSC_VER
471 return VINF_IEM_RAISED_XCPT; /* not reached */
472#endif
473}
474
475
476/**
477 * Used by TB code when it wants to raise a \#NM.
478 */
479IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
480{
481 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
482 iemRaiseDeviceNotAvailableJmp(pVCpu);
483#ifndef _MSC_VER
484 return VINF_IEM_RAISED_XCPT; /* not reached */
485#endif
486}
487
488
489/**
490 * Used by TB code when it wants to raise a \#GP(0).
491 */
492IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
493{
494 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
495 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
496#ifndef _MSC_VER
497 return VINF_IEM_RAISED_XCPT; /* not reached */
498#endif
499}
500
501
502/**
503 * Used by TB code when it wants to raise a \#MF.
504 */
505IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
506{
507 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
508 iemRaiseMathFaultJmp(pVCpu);
509#ifndef _MSC_VER
510 return VINF_IEM_RAISED_XCPT; /* not reached */
511#endif
512}
513
514
515/**
516 * Used by TB code when it wants to raise a \#XF.
517 */
518IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
519{
520 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
521 iemRaiseSimdFpExceptionJmp(pVCpu);
522#ifndef _MSC_VER
523 return VINF_IEM_RAISED_XCPT; /* not reached */
524#endif
525}
526
527
528/**
529 * Used by TB code when detecting opcode changes.
530 * @see iemThreadeFuncWorkerObsoleteTb
531 */
532IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
533{
534 /* We set fSafeToFree to false where as we're being called in the context
535 of a TB callback function, which for native TBs means we cannot release
536 the executable memory till we've returned our way back to iemTbExec as
537 that return path codes via the native code generated for the TB. */
538 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
539 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
540 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
541 return VINF_IEM_REEXEC_BREAK;
542}
543
544
545/**
546 * Used by TB code when we need to switch to a TB with CS.LIM checking.
547 */
548IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
549{
550 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
551 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
552 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
553 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
554 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
555 return VINF_IEM_REEXEC_BREAK;
556}
557
558
559/**
560 * Used by TB code when we missed a PC check after a branch.
561 */
562IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
563{
564 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
565 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
566 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
567 pVCpu->iem.s.pbInstrBuf));
568 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
569 return VINF_IEM_REEXEC_BREAK;
570}
571
572
573
574/*********************************************************************************************************************************
575* Helpers: Segmented memory fetches and stores. *
576*********************************************************************************************************************************/
577
578/**
579 * Used by TB code to load unsigned 8-bit data w/ segmentation.
580 */
581IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
582{
583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
584 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
585#else
586 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
587#endif
588}
589
590
591/**
592 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
593 * to 16 bits.
594 */
595IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
596{
597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
598 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
599#else
600 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
601#endif
602}
603
604
605/**
606 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
607 * to 32 bits.
608 */
609IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
610{
611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
612 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
613#else
614 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
615#endif
616}
617
618/**
619 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
620 * to 64 bits.
621 */
622IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
623{
624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
625 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
626#else
627 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
628#endif
629}
630
631
632/**
633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
634 */
635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
636{
637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
638 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
639#else
640 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
641#endif
642}
643
644
645/**
646 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
647 * to 32 bits.
648 */
649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
650{
651#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
652 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
653#else
654 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
655#endif
656}
657
658
659/**
660 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
661 * to 64 bits.
662 */
663IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
664{
665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
666 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
667#else
668 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
669#endif
670}
671
672
673/**
674 * Used by TB code to load unsigned 32-bit data w/ segmentation.
675 */
676IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
677{
678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
679 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
680#else
681 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
682#endif
683}
684
685
686/**
687 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
688 * to 64 bits.
689 */
690IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
691{
692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
693 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
694#else
695 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
696#endif
697}
698
699
700/**
701 * Used by TB code to load unsigned 64-bit data w/ segmentation.
702 */
703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
704{
705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
706 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
707#else
708 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
709#endif
710}
711
712
713#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
714/**
715 * Used by TB code to load 128-bit data w/ segmentation.
716 */
717IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
721#else
722 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load 128-bit data w/ segmentation.
729 */
730IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
731{
732#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
733 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
734#else
735 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
736#endif
737}
738
739
740/**
741 * Used by TB code to load 128-bit data w/ segmentation.
742 */
743IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
744{
745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
746 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
747#else
748 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
749#endif
750}
751
752
753/**
754 * Used by TB code to load 256-bit data w/ segmentation.
755 */
756IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
757{
758#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
759 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
760#else
761 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
762#endif
763}
764
765
766/**
767 * Used by TB code to load 256-bit data w/ segmentation.
768 */
769IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
770{
771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
772 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
773#else
774 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
775#endif
776}
777#endif
778
779
780/**
781 * Used by TB code to store unsigned 8-bit data w/ segmentation.
782 */
783IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
784{
785#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
786 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
787#else
788 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
789#endif
790}
791
792
793/**
794 * Used by TB code to store unsigned 16-bit data w/ segmentation.
795 */
796IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
797{
798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
799 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
800#else
801 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
802#endif
803}
804
805
806/**
807 * Used by TB code to store unsigned 32-bit data w/ segmentation.
808 */
809IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
810{
811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
812 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
813#else
814 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
815#endif
816}
817
818
819/**
820 * Used by TB code to store unsigned 64-bit data w/ segmentation.
821 */
822IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
823{
824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
825 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
826#else
827 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
828#endif
829}
830
831
832#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
833/**
834 * Used by TB code to store unsigned 128-bit data w/ segmentation.
835 */
836IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
837{
838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
839 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
840#else
841 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
842#endif
843}
844
845
846/**
847 * Used by TB code to store unsigned 128-bit data w/ segmentation.
848 */
849IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
850{
851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
852 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
853#else
854 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
855#endif
856}
857
858
859/**
860 * Used by TB code to store unsigned 256-bit data w/ segmentation.
861 */
862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
863{
864#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
865 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
866#else
867 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
868#endif
869}
870
871
872/**
873 * Used by TB code to store unsigned 256-bit data w/ segmentation.
874 */
875IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
876{
877#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
878 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
879#else
880 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
881#endif
882}
883#endif
884
885
886
887/**
888 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
889 */
890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
891{
892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
893 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
894#else
895 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
896#endif
897}
898
899
900/**
901 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
902 */
903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
904{
905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
906 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
907#else
908 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
909#endif
910}
911
912
913/**
914 * Used by TB code to store an 32-bit selector value onto a generic stack.
915 *
916 * Intel CPUs doesn't do write a whole dword, thus the special function.
917 */
918IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
919{
920#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
921 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
922#else
923 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
924#endif
925}
926
927
928/**
929 * Used by TB code to push unsigned 64-bit value onto a generic stack.
930 */
931IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
932{
933#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
934 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
935#else
936 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
937#endif
938}
939
940
941/**
942 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
943 */
944IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
945{
946#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
947 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
948#else
949 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
950#endif
951}
952
953
954/**
955 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
956 */
957IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
958{
959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
960 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
961#else
962 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
963#endif
964}
965
966
967/**
968 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
969 */
970IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
971{
972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
973 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
974#else
975 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
976#endif
977}
978
979
980
981/*********************************************************************************************************************************
982* Helpers: Flat memory fetches and stores. *
983*********************************************************************************************************************************/
984
985/**
986 * Used by TB code to load unsigned 8-bit data w/ flat address.
987 * @note Zero extending the value to 64-bit to simplify assembly.
988 */
989IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
990{
991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
992 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
993#else
994 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
995#endif
996}
997
998
999/**
1000 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1001 * to 16 bits.
1002 * @note Zero extending the value to 64-bit to simplify assembly.
1003 */
1004IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1005{
1006#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1007 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1008#else
1009 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1010#endif
1011}
1012
1013
1014/**
1015 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1016 * to 32 bits.
1017 * @note Zero extending the value to 64-bit to simplify assembly.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1022 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1023#else
1024 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1025#endif
1026}
1027
1028
1029/**
1030 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1031 * to 64 bits.
1032 */
1033IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1034{
1035#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1036 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1037#else
1038 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1039#endif
1040}
1041
1042
1043/**
1044 * Used by TB code to load unsigned 16-bit data w/ flat address.
1045 * @note Zero extending the value to 64-bit to simplify assembly.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1050 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1051#else
1052 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1059 * to 32 bits.
1060 * @note Zero extending the value to 64-bit to simplify assembly.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1065 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1066#else
1067 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1074 * to 64 bits.
1075 * @note Zero extending the value to 64-bit to simplify assembly.
1076 */
1077IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1078{
1079#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1080 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1081#else
1082 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1083#endif
1084}
1085
1086
1087/**
1088 * Used by TB code to load unsigned 32-bit data w/ flat address.
1089 * @note Zero extending the value to 64-bit to simplify assembly.
1090 */
1091IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1092{
1093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1094 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1095#else
1096 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1097#endif
1098}
1099
1100
1101/**
1102 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1103 * to 64 bits.
1104 * @note Zero extending the value to 64-bit to simplify assembly.
1105 */
1106IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1107{
1108#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1109 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1110#else
1111 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1112#endif
1113}
1114
1115
1116/**
1117 * Used by TB code to load unsigned 64-bit data w/ flat address.
1118 */
1119IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1120{
1121#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1122 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1123#else
1124 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1125#endif
1126}
1127
1128
1129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1130/**
1131 * Used by TB code to load unsigned 128-bit data w/ flat address.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1134{
1135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1136 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1137#else
1138 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1139#endif
1140}
1141
1142
1143/**
1144 * Used by TB code to load unsigned 128-bit data w/ flat address.
1145 */
1146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1147{
1148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1149 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1150#else
1151 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1152#endif
1153}
1154
1155
1156/**
1157 * Used by TB code to load unsigned 128-bit data w/ flat address.
1158 */
1159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1160{
1161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1162 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1163#else
1164 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1165#endif
1166}
1167
1168
1169/**
1170 * Used by TB code to load unsigned 256-bit data w/ flat address.
1171 */
1172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1173{
1174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1175 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1176#else
1177 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1178#endif
1179}
1180
1181
1182/**
1183 * Used by TB code to load unsigned 256-bit data w/ flat address.
1184 */
1185IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1186{
1187#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1188 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1189#else
1190 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1191#endif
1192}
1193#endif
1194
1195
1196/**
1197 * Used by TB code to store unsigned 8-bit data w/ flat address.
1198 */
1199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1200{
1201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1202 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1203#else
1204 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1205#endif
1206}
1207
1208
1209/**
1210 * Used by TB code to store unsigned 16-bit data w/ flat address.
1211 */
1212IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1213{
1214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1215 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1216#else
1217 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1218#endif
1219}
1220
1221
1222/**
1223 * Used by TB code to store unsigned 32-bit data w/ flat address.
1224 */
1225IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1226{
1227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1228 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1229#else
1230 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1231#endif
1232}
1233
1234
1235/**
1236 * Used by TB code to store unsigned 64-bit data w/ flat address.
1237 */
1238IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1239{
1240#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1241 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1242#else
1243 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1244#endif
1245}
1246
1247
1248#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1249/**
1250 * Used by TB code to store unsigned 128-bit data w/ flat address.
1251 */
1252IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1253{
1254#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1255 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1256#else
1257 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1258#endif
1259}
1260
1261
1262/**
1263 * Used by TB code to store unsigned 128-bit data w/ flat address.
1264 */
1265IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1266{
1267#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1268 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1269#else
1270 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1271#endif
1272}
1273
1274
1275/**
1276 * Used by TB code to store unsigned 256-bit data w/ flat address.
1277 */
1278IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1279{
1280#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1281 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1282#else
1283 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1284#endif
1285}
1286
1287
1288/**
1289 * Used by TB code to store unsigned 256-bit data w/ flat address.
1290 */
1291IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1292{
1293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1294 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1295#else
1296 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1297#endif
1298}
1299#endif
1300
1301
1302
1303/**
1304 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1305 */
1306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1307{
1308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1309 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1310#else
1311 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1312#endif
1313}
1314
1315
1316/**
1317 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1318 */
1319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1322 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1323#else
1324 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to store a segment selector value onto a flat stack.
1331 *
1332 * Intel CPUs doesn't do write a whole dword, thus the special function.
1333 */
1334IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1335{
1336#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1337 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1338#else
1339 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1340#endif
1341}
1342
1343
1344/**
1345 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1346 */
1347IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1350 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1351#else
1352 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1361{
1362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1363 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1364#else
1365 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1366#endif
1367}
1368
1369
1370/**
1371 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1372 */
1373IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1374{
1375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1376 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1377#else
1378 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1379#endif
1380}
1381
1382
1383/**
1384 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1385 */
1386IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1387{
1388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1389 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1390#else
1391 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1392#endif
1393}
1394
1395
1396
1397/*********************************************************************************************************************************
1398* Helpers: Segmented memory mapping. *
1399*********************************************************************************************************************************/
1400
1401/**
1402 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1403 * segmentation.
1404 */
1405IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1406 RTGCPTR GCPtrMem, uint8_t iSegReg))
1407{
1408#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1409 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#else
1411 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1412#endif
1413}
1414
1415
1416/**
1417 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1418 */
1419IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1420 RTGCPTR GCPtrMem, uint8_t iSegReg))
1421{
1422#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1423 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#else
1425 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1426#endif
1427}
1428
1429
1430/**
1431 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1432 */
1433IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1434 RTGCPTR GCPtrMem, uint8_t iSegReg))
1435{
1436#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1437 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#else
1439 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1440#endif
1441}
1442
1443
1444/**
1445 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1446 */
1447IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1448 RTGCPTR GCPtrMem, uint8_t iSegReg))
1449{
1450#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1451 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1452#else
1453 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1454#endif
1455}
1456
1457
1458/**
1459 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1460 * segmentation.
1461 */
1462IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1463 RTGCPTR GCPtrMem, uint8_t iSegReg))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1467#else
1468 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1477 RTGCPTR GCPtrMem, uint8_t iSegReg))
1478{
1479#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1480 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1481#else
1482 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1483#endif
1484}
1485
1486
1487/**
1488 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1489 */
1490IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1491 RTGCPTR GCPtrMem, uint8_t iSegReg))
1492{
1493#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1494 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1495#else
1496 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1497#endif
1498}
1499
1500
1501/**
1502 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1503 */
1504IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1505 RTGCPTR GCPtrMem, uint8_t iSegReg))
1506{
1507#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1508 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1509#else
1510 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1511#endif
1512}
1513
1514
1515/**
1516 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1517 * segmentation.
1518 */
1519IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1520 RTGCPTR GCPtrMem, uint8_t iSegReg))
1521{
1522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1523 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1524#else
1525 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1526#endif
1527}
1528
1529
1530/**
1531 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1532 */
1533IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1534 RTGCPTR GCPtrMem, uint8_t iSegReg))
1535{
1536#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1537 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1538#else
1539 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1540#endif
1541}
1542
1543
1544/**
1545 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1546 */
1547IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1548 RTGCPTR GCPtrMem, uint8_t iSegReg))
1549{
1550#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1551 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1552#else
1553 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1554#endif
1555}
1556
1557
1558/**
1559 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1560 */
1561IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1562 RTGCPTR GCPtrMem, uint8_t iSegReg))
1563{
1564#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1565 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1566#else
1567 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1568#endif
1569}
1570
1571
1572/**
1573 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1574 * segmentation.
1575 */
1576IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1577 RTGCPTR GCPtrMem, uint8_t iSegReg))
1578{
1579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1580 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1581#else
1582 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1583#endif
1584}
1585
1586
1587/**
1588 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1591 RTGCPTR GCPtrMem, uint8_t iSegReg))
1592{
1593#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1594 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1595#else
1596 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1597#endif
1598}
1599
1600
1601/**
1602 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1603 */
1604IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1605 RTGCPTR GCPtrMem, uint8_t iSegReg))
1606{
1607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1608 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1609#else
1610 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1619 RTGCPTR GCPtrMem, uint8_t iSegReg))
1620{
1621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1622 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1623#else
1624 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1633 RTGCPTR GCPtrMem, uint8_t iSegReg))
1634{
1635#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1636 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1637#else
1638 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1639#endif
1640}
1641
1642
1643/**
1644 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1647 RTGCPTR GCPtrMem, uint8_t iSegReg))
1648{
1649#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1650 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1651#else
1652 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1653#endif
1654}
1655
1656
1657/**
1658 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1659 * segmentation.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1662 RTGCPTR GCPtrMem, uint8_t iSegReg))
1663{
1664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1665 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1666#else
1667 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1668#endif
1669}
1670
1671
1672/**
1673 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1674 */
1675IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1676 RTGCPTR GCPtrMem, uint8_t iSegReg))
1677{
1678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1679 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1680#else
1681 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1682#endif
1683}
1684
1685
1686/**
1687 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1690 RTGCPTR GCPtrMem, uint8_t iSegReg))
1691{
1692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1693 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1694#else
1695 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1696#endif
1697}
1698
1699
1700/**
1701 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1704 RTGCPTR GCPtrMem, uint8_t iSegReg))
1705{
1706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1707 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1708#else
1709 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1710#endif
1711}
1712
1713
1714/*********************************************************************************************************************************
1715* Helpers: Flat memory mapping. *
1716*********************************************************************************************************************************/
1717
1718/**
1719 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1720 * address.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1723{
1724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1725 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1726#else
1727 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1728#endif
1729}
1730
1731
1732/**
1733 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1736{
1737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1738 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1739#else
1740 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1741#endif
1742}
1743
1744
1745/**
1746 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1747 */
1748IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1749{
1750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1751 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1752#else
1753 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1754#endif
1755}
1756
1757
1758/**
1759 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1764 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1765#else
1766 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1773 * address.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1778 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1779#else
1780 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1791 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1792#else
1793 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1802{
1803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1804 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1805#else
1806 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1807#endif
1808}
1809
1810
1811/**
1812 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1813 */
1814IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1815{
1816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1817 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1818#else
1819 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1820#endif
1821}
1822
1823
1824/**
1825 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1826 * address.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1831 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1832#else
1833 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1844 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1845#else
1846 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1857 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1858#else
1859 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1870 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1871#else
1872 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1879 * address.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1884 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1885#else
1886 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1897 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1898#else
1899 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1910 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1911#else
1912 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1923 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1924#else
1925 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1936 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1937#else
1938 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1949 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1950#else
1951 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1952#endif
1953}
1954
1955
1956/**
1957 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1958 * address.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1963 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1964#else
1965 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1976 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1977#else
1978 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1989 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1990#else
1991 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2002 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2003#else
2004 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2005#endif
2006}
2007
2008
2009/*********************************************************************************************************************************
2010* Helpers: Commit, rollback & unmap *
2011*********************************************************************************************************************************/
2012
2013/**
2014 * Used by TB code to commit and unmap a read-write memory mapping.
2015 */
2016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2017{
2018 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2019}
2020
2021
2022/**
2023 * Used by TB code to commit and unmap a read-write memory mapping.
2024 */
2025IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2026{
2027 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2028}
2029
2030
2031/**
2032 * Used by TB code to commit and unmap a write-only memory mapping.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2035{
2036 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2037}
2038
2039
2040/**
2041 * Used by TB code to commit and unmap a read-only memory mapping.
2042 */
2043IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2044{
2045 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2046}
2047
2048
2049/**
2050 * Reinitializes the native recompiler state.
2051 *
2052 * Called before starting a new recompile job.
2053 */
2054static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2055{
2056 pReNative->cLabels = 0;
2057 pReNative->bmLabelTypes = 0;
2058 pReNative->cFixups = 0;
2059 pReNative->cTbExitFixups = 0;
2060#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2061 pReNative->pDbgInfo->cEntries = 0;
2062 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2063#endif
2064 pReNative->pTbOrg = pTb;
2065 pReNative->cCondDepth = 0;
2066 pReNative->uCondSeqNo = 0;
2067 pReNative->uCheckIrqSeqNo = 0;
2068 pReNative->uTlbSeqNo = 0;
2069#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2070 pReNative->fSkippingEFlags = 0;
2071#endif
2072#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2073 pReNative->PostponedEfl.fEFlags = 0;
2074 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid;
2075 pReNative->PostponedEfl.cOpBits = 0;
2076 pReNative->PostponedEfl.idxReg1 = UINT8_MAX;
2077 pReNative->PostponedEfl.idxReg2 = UINT8_MAX;
2078#endif
2079
2080#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2081 pReNative->Core.offPc = 0;
2082# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2083 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2084# endif
2085# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2086 pReNative->Core.fDebugPcInitialized = false;
2087# endif
2088#endif
2089#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2090 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2091#endif
2092 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2093#if IEMNATIVE_HST_GREG_COUNT < 32
2094 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2095#endif
2096 ;
2097 pReNative->Core.bmHstRegsWithGstShadow = 0;
2098 pReNative->Core.bmGstRegShadows = 0;
2099#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2100 pReNative->Core.bmGstRegShadowDirty = 0;
2101#endif
2102 pReNative->Core.bmVars = 0;
2103 pReNative->Core.bmStack = 0;
2104 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2105 pReNative->Core.u64ArgVars = UINT64_MAX;
2106
2107 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2108 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2120 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2121 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2122 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2123 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2124 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2125 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2126 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2127 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2128 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2129 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2130 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2131
2132 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2133
2134 /* Full host register reinit: */
2135 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2136 {
2137 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2138 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2139 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2140 }
2141
2142 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2143 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2144#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2145 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2146#endif
2147#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2148 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2149#endif
2150#ifdef IEMNATIVE_REG_FIXED_TMP1
2151 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2152#endif
2153#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2154 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2155#endif
2156 );
2157 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2158 {
2159 fRegs &= ~RT_BIT_32(idxReg);
2160 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2161 }
2162
2163 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2164#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2165 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2166#endif
2167#ifdef IEMNATIVE_REG_FIXED_TMP0
2168 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2169#endif
2170#ifdef IEMNATIVE_REG_FIXED_TMP1
2171 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2172#endif
2173#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2174 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2175#endif
2176
2177#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2178 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2179# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2180 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2181# endif
2182 ;
2183 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2184 pReNative->Core.bmGstSimdRegShadows = 0;
2185 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2186 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2187
2188 /* Full host register reinit: */
2189 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2190 {
2191 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2192 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2193 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2194 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2195 }
2196
2197 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2198 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2199 {
2200 fRegs &= ~RT_BIT_32(idxReg);
2201 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2202 }
2203
2204#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2205 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2206#endif
2207
2208#endif
2209
2210 return pReNative;
2211}
2212
2213
2214/**
2215 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2216 */
2217static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2218{
2219 RTMemFree(pReNative->pInstrBuf);
2220 RTMemFree(pReNative->paLabels);
2221 RTMemFree(pReNative->paFixups);
2222 RTMemFree(pReNative->paTbExitFixups);
2223#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2224 RTMemFree(pReNative->pDbgInfo);
2225#endif
2226 RTMemFree(pReNative);
2227}
2228
2229
2230/**
2231 * Allocates and initializes the native recompiler state.
2232 *
2233 * This is called the first time an EMT wants to recompile something.
2234 *
2235 * @returns Pointer to the new recompiler state.
2236 * @param pVCpu The cross context virtual CPU structure of the calling
2237 * thread.
2238 * @param pTb The TB that's about to be recompiled. When this is NULL,
2239 * the recompiler state is for emitting the common per-chunk
2240 * code from iemNativeRecompileAttachExecMemChunkCtx.
2241 * @thread EMT(pVCpu)
2242 */
2243static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2244{
2245 VMCPU_ASSERT_EMT(pVCpu);
2246
2247 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2248 AssertReturn(pReNative, NULL);
2249
2250 /*
2251 * Try allocate all the buffers and stuff we need.
2252 */
2253 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2254 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2255 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2256 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2257 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2258#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2259 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2260#endif
2261 if (RT_LIKELY( pReNative->pInstrBuf
2262 && pReNative->paLabels
2263 && pReNative->paFixups
2264 && pReNative->paTbExitFixups)
2265#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2266 && pReNative->pDbgInfo
2267#endif
2268 )
2269 {
2270 /*
2271 * Set the buffer & array sizes on success.
2272 */
2273 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2274 pReNative->cLabelsAlloc = _8K / cFactor;
2275 pReNative->cFixupsAlloc = _16K / cFactor;
2276 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2277#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2278 pReNative->cDbgInfoAlloc = _16K / cFactor;
2279#endif
2280
2281 /* Other constant stuff: */
2282 pReNative->pVCpu = pVCpu;
2283
2284 /*
2285 * Done, just reinit it.
2286 */
2287 return iemNativeReInit(pReNative, pTb);
2288 }
2289
2290 /*
2291 * Failed. Cleanup and return.
2292 */
2293 AssertFailed();
2294 iemNativeTerm(pReNative);
2295 return NULL;
2296}
2297
2298
2299/**
2300 * Creates a label
2301 *
2302 * If the label does not yet have a defined position,
2303 * call iemNativeLabelDefine() later to set it.
2304 *
2305 * @returns Label ID. Throws VBox status code on failure, so no need to check
2306 * the return value.
2307 * @param pReNative The native recompile state.
2308 * @param enmType The label type.
2309 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2310 * label is not yet defined (default).
2311 * @param uData Data associated with the lable. Only applicable to
2312 * certain type of labels. Default is zero.
2313 */
2314DECL_HIDDEN_THROW(uint32_t)
2315iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2316 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2317{
2318 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2319#if defined(RT_ARCH_AMD64)
2320 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2321#endif
2322
2323 /*
2324 * Locate existing label definition.
2325 *
2326 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2327 * and uData is zero.
2328 */
2329 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2330 uint32_t const cLabels = pReNative->cLabels;
2331 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2332#ifndef VBOX_STRICT
2333 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2334 && offWhere == UINT32_MAX
2335 && uData == 0
2336#endif
2337 )
2338 {
2339#ifndef VBOX_STRICT
2340 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2342 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2343 if (idxLabel < pReNative->cLabels)
2344 return idxLabel;
2345#else
2346 for (uint32_t i = 0; i < cLabels; i++)
2347 if ( paLabels[i].enmType == enmType
2348 && paLabels[i].uData == uData)
2349 {
2350 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2351 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2352 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2353 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2354 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2355 return i;
2356 }
2357 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2358 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2359#endif
2360 }
2361
2362 /*
2363 * Make sure we've got room for another label.
2364 */
2365 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2366 { /* likely */ }
2367 else
2368 {
2369 uint32_t cNew = pReNative->cLabelsAlloc;
2370 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2371 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2372 cNew *= 2;
2373 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2374 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2375 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2376 pReNative->paLabels = paLabels;
2377 pReNative->cLabelsAlloc = cNew;
2378 }
2379
2380 /*
2381 * Define a new label.
2382 */
2383 paLabels[cLabels].off = offWhere;
2384 paLabels[cLabels].enmType = enmType;
2385 paLabels[cLabels].uData = uData;
2386 pReNative->cLabels = cLabels + 1;
2387
2388 Assert((unsigned)enmType < 64);
2389 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2390
2391 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2392 {
2393 Assert(uData == 0);
2394 pReNative->aidxUniqueLabels[enmType] = cLabels;
2395 }
2396
2397 if (offWhere != UINT32_MAX)
2398 {
2399#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2400 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2401 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2402#endif
2403 }
2404 return cLabels;
2405}
2406
2407
2408/**
2409 * Defines the location of an existing label.
2410 *
2411 * @param pReNative The native recompile state.
2412 * @param idxLabel The label to define.
2413 * @param offWhere The position.
2414 */
2415DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2416{
2417 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2418 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2419 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2420 pLabel->off = offWhere;
2421#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2422 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2423 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2424#endif
2425}
2426
2427
2428/**
2429 * Looks up a lable.
2430 *
2431 * @returns Label ID if found, UINT32_MAX if not.
2432 */
2433DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2434 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2435{
2436 Assert((unsigned)enmType < 64);
2437 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2438 {
2439 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2440 return pReNative->aidxUniqueLabels[enmType];
2441
2442 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2443 uint32_t const cLabels = pReNative->cLabels;
2444 for (uint32_t i = 0; i < cLabels; i++)
2445 if ( paLabels[i].enmType == enmType
2446 && paLabels[i].uData == uData
2447 && ( paLabels[i].off == offWhere
2448 || offWhere == UINT32_MAX
2449 || paLabels[i].off == UINT32_MAX))
2450 return i;
2451 }
2452 return UINT32_MAX;
2453}
2454
2455
2456/**
2457 * Adds a fixup.
2458 *
2459 * @throws VBox status code (int) on failure.
2460 * @param pReNative The native recompile state.
2461 * @param offWhere The instruction offset of the fixup location.
2462 * @param idxLabel The target label ID for the fixup.
2463 * @param enmType The fixup type.
2464 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2465 */
2466DECL_HIDDEN_THROW(void)
2467iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2468 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2469{
2470 Assert(idxLabel <= UINT16_MAX);
2471 Assert((unsigned)enmType <= UINT8_MAX);
2472#ifdef RT_ARCH_ARM64
2473 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2474 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2476#endif
2477
2478 /*
2479 * Make sure we've room.
2480 */
2481 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2482 uint32_t const cFixups = pReNative->cFixups;
2483 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2484 { /* likely */ }
2485 else
2486 {
2487 uint32_t cNew = pReNative->cFixupsAlloc;
2488 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2489 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2490 cNew *= 2;
2491 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2492 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2493 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2494 pReNative->paFixups = paFixups;
2495 pReNative->cFixupsAlloc = cNew;
2496 }
2497
2498 /*
2499 * Add the fixup.
2500 */
2501 paFixups[cFixups].off = offWhere;
2502 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2503 paFixups[cFixups].enmType = enmType;
2504 paFixups[cFixups].offAddend = offAddend;
2505 pReNative->cFixups = cFixups + 1;
2506}
2507
2508
2509/**
2510 * Adds a fixup to the per chunk tail code.
2511 *
2512 * @throws VBox status code (int) on failure.
2513 * @param pReNative The native recompile state.
2514 * @param offWhere The instruction offset of the fixup location.
2515 * @param enmExitReason The exit reason to jump to.
2516 */
2517DECL_HIDDEN_THROW(void)
2518iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2519{
2520 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2521
2522 /*
2523 * Make sure we've room.
2524 */
2525 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2526 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2527 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2528 { /* likely */ }
2529 else
2530 {
2531 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2532 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2533 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2534 cNew *= 2;
2535 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2536 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2537 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2538 pReNative->paTbExitFixups = paTbExitFixups;
2539 pReNative->cTbExitFixupsAlloc = cNew;
2540 }
2541
2542 /*
2543 * Add the fixup.
2544 */
2545 paTbExitFixups[cTbExitFixups].off = offWhere;
2546 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2547 pReNative->cTbExitFixups = cTbExitFixups + 1;
2548}
2549
2550
2551/**
2552 * Slow code path for iemNativeInstrBufEnsure.
2553 */
2554DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2555{
2556 /* Double the buffer size till we meet the request. */
2557 uint32_t cNew = pReNative->cInstrBufAlloc;
2558 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2559 do
2560 cNew *= 2;
2561 while (cNew < off + cInstrReq);
2562
2563 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2564#ifdef RT_ARCH_ARM64
2565 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2566#else
2567 uint32_t const cbMaxInstrBuf = _2M;
2568#endif
2569 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2570
2571 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2572 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2573
2574#ifdef VBOX_STRICT
2575 pReNative->offInstrBufChecked = off + cInstrReq;
2576#endif
2577 pReNative->cInstrBufAlloc = cNew;
2578 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2579}
2580
2581#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2582
2583/**
2584 * Grows the static debug info array used during recompilation.
2585 *
2586 * @returns Pointer to the new debug info block; throws VBox status code on
2587 * failure, so no need to check the return value.
2588 */
2589DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2590{
2591 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2592 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2593 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2594 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2595 pReNative->pDbgInfo = pDbgInfo;
2596 pReNative->cDbgInfoAlloc = cNew;
2597 return pDbgInfo;
2598}
2599
2600
2601/**
2602 * Adds a new debug info uninitialized entry, returning the pointer to it.
2603 */
2604DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2605{
2606 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2607 { /* likely */ }
2608 else
2609 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2610 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2611}
2612
2613
2614/**
2615 * Debug Info: Adds a native offset record, if necessary.
2616 */
2617DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2618{
2619 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2620
2621 /*
2622 * Do we need this one?
2623 */
2624 uint32_t const offPrev = pDbgInfo->offNativeLast;
2625 if (offPrev == off)
2626 return;
2627 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2628
2629 /*
2630 * Add it.
2631 */
2632 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2633 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2634 pEntry->NativeOffset.offNative = off;
2635 pDbgInfo->offNativeLast = off;
2636}
2637
2638
2639/**
2640 * Debug Info: Record info about a label.
2641 */
2642static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2643{
2644 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2645 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2646 pEntry->Label.uUnused = 0;
2647 pEntry->Label.enmLabel = (uint8_t)enmType;
2648 pEntry->Label.uData = uData;
2649}
2650
2651
2652/**
2653 * Debug Info: Record info about a threaded call.
2654 */
2655static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2656{
2657 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2658 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2659 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2660 pEntry->ThreadedCall.uUnused = 0;
2661 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2662}
2663
2664
2665/**
2666 * Debug Info: Record info about a new guest instruction.
2667 */
2668static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2669{
2670 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2671 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2672 pEntry->GuestInstruction.uUnused = 0;
2673 pEntry->GuestInstruction.fExec = fExec;
2674}
2675
2676
2677/**
2678 * Debug Info: Record info about guest register shadowing.
2679 */
2680DECL_HIDDEN_THROW(void)
2681iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2682 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2683{
2684 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2685 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2686 pEntry->GuestRegShadowing.uUnused = 0;
2687 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2688 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2689 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2690#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2691 Assert( idxHstReg != UINT8_MAX
2692 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2693#endif
2694}
2695
2696
2697# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2698/**
2699 * Debug Info: Record info about guest register shadowing.
2700 */
2701DECL_HIDDEN_THROW(void)
2702iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2703 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2704{
2705 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2706 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2707 pEntry->GuestSimdRegShadowing.uUnused = 0;
2708 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2709 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2710 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2711}
2712# endif
2713
2714
2715# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2716/**
2717 * Debug Info: Record info about delayed RIP updates.
2718 */
2719DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2720{
2721 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2722 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2723 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2724 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2725}
2726# endif
2727
2728# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2729
2730/**
2731 * Debug Info: Record info about a dirty guest register.
2732 */
2733DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2734 uint8_t idxGstReg, uint8_t idxHstReg)
2735{
2736 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2737 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2738 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2739 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2740 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2741}
2742
2743
2744/**
2745 * Debug Info: Record info about a dirty guest register writeback operation.
2746 */
2747DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2748{
2749 unsigned const cBitsGstRegMask = 25;
2750 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2751
2752 /* The first block of 25 bits: */
2753 if (fGstReg & fGstRegMask)
2754 {
2755 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2756 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2757 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2758 pEntry->GuestRegWriteback.cShift = 0;
2759 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2760 fGstReg &= ~(uint64_t)fGstRegMask;
2761 if (!fGstReg)
2762 return;
2763 }
2764
2765 /* The second block of 25 bits: */
2766 fGstReg >>= cBitsGstRegMask;
2767 if (fGstReg & fGstRegMask)
2768 {
2769 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2770 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2771 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2772 pEntry->GuestRegWriteback.cShift = 0;
2773 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2774 fGstReg &= ~(uint64_t)fGstRegMask;
2775 if (!fGstReg)
2776 return;
2777 }
2778
2779 /* The last block with 14 bits: */
2780 fGstReg >>= cBitsGstRegMask;
2781 Assert(fGstReg & fGstRegMask);
2782 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2783 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2784 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2785 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2786 pEntry->GuestRegWriteback.cShift = 2;
2787 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2788}
2789
2790# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2791
2792# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2793/**
2794 * Debug Info: Record info about emitting a postponed EFLAGS calculation.
2795 */
2796DECL_HIDDEN_THROW(void)
2797iemNativeDbgInfoAddPostponedEFlagsCalc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVE_POSTPONED_EFL_OP_T enmOp,
2798 uint8_t cOpBits, uint8_t idxEmit)
2799{
2800 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2801 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2802 pEntry->PostponedEflCalc.uType = kIemTbDbgEntryType_PostponedEFlagsCalc;
2803 pEntry->PostponedEflCalc.enmOp = (unsigned)enmOp;
2804 pEntry->PostponedEflCalc.cOpBits = cOpBits;
2805 pEntry->PostponedEflCalc.idxEmit = idxEmit;
2806 pEntry->PostponedEflCalc.uUnused = 0;
2807}
2808# endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */
2809
2810#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2811
2812
2813/*********************************************************************************************************************************
2814* Register Allocator *
2815*********************************************************************************************************************************/
2816
2817/**
2818 * Register parameter indexes (indexed by argument number).
2819 */
2820DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2821{
2822 IEMNATIVE_CALL_ARG0_GREG,
2823 IEMNATIVE_CALL_ARG1_GREG,
2824 IEMNATIVE_CALL_ARG2_GREG,
2825 IEMNATIVE_CALL_ARG3_GREG,
2826#if defined(IEMNATIVE_CALL_ARG4_GREG)
2827 IEMNATIVE_CALL_ARG4_GREG,
2828# if defined(IEMNATIVE_CALL_ARG5_GREG)
2829 IEMNATIVE_CALL_ARG5_GREG,
2830# if defined(IEMNATIVE_CALL_ARG6_GREG)
2831 IEMNATIVE_CALL_ARG6_GREG,
2832# if defined(IEMNATIVE_CALL_ARG7_GREG)
2833 IEMNATIVE_CALL_ARG7_GREG,
2834# endif
2835# endif
2836# endif
2837#endif
2838};
2839AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2840
2841/**
2842 * Call register masks indexed by argument count.
2843 */
2844DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2845{
2846 0,
2847 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2848 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2849 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2850 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2851 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2852#if defined(IEMNATIVE_CALL_ARG4_GREG)
2853 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2854 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2855# if defined(IEMNATIVE_CALL_ARG5_GREG)
2856 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2857 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2858# if defined(IEMNATIVE_CALL_ARG6_GREG)
2859 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2860 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2861 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2862# if defined(IEMNATIVE_CALL_ARG7_GREG)
2863 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2864 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2865 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2866# endif
2867# endif
2868# endif
2869#endif
2870};
2871
2872#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2873/**
2874 * BP offset of the stack argument slots.
2875 *
2876 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2877 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2878 */
2879DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2880{
2881 IEMNATIVE_FP_OFF_STACK_ARG0,
2882# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2883 IEMNATIVE_FP_OFF_STACK_ARG1,
2884# endif
2885# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2886 IEMNATIVE_FP_OFF_STACK_ARG2,
2887# endif
2888# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2889 IEMNATIVE_FP_OFF_STACK_ARG3,
2890# endif
2891};
2892AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2893#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2894
2895/**
2896 * Info about shadowed guest register values.
2897 * @see IEMNATIVEGSTREG
2898 */
2899DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2900{
2901#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2902 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2903 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2904 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2905 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2906 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2907 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2908 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2909 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2910 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2911 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2912 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2913 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2914 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2915 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2916 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2917 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2918 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2919 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2920 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2921 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2922 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2923 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2924 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2925 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2926 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2927 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2928 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2929 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2930 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2931 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2932 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2933 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2934 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2935 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2936 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2937 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2938 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2939 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2940 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2941 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2942 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2943 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2944 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2945 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2946 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2947 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2948 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2949 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2950 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2951 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2952 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2953 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2954 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2955 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2956#undef CPUMCTX_OFF_AND_SIZE
2957};
2958AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2959
2960
2961/** Host CPU general purpose register names. */
2962DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2963{
2964#ifdef RT_ARCH_AMD64
2965 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2966#elif RT_ARCH_ARM64
2967 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2968 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2969#else
2970# error "port me"
2971#endif
2972};
2973
2974
2975#if 0 /* unused */
2976/**
2977 * Tries to locate a suitable register in the given register mask.
2978 *
2979 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2980 * failed.
2981 *
2982 * @returns Host register number on success, returns UINT8_MAX on failure.
2983 */
2984static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2985{
2986 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2987 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2988 if (fRegs)
2989 {
2990 /** @todo pick better here: */
2991 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2992
2993 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2994 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2995 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2996 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2997
2998 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2999 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3000 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3001 return idxReg;
3002 }
3003 return UINT8_MAX;
3004}
3005#endif /* unused */
3006
3007#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3008
3009/**
3010 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
3011 *
3012 * @returns New code buffer offset on success, UINT32_MAX on failure.
3013 * @param pReNative .
3014 * @param off The current code buffer position.
3015 * @param enmGstReg The guest register to store to.
3016 * @param idxHstReg The host register to store from.
3017 */
3018DECL_FORCE_INLINE_THROW(uint32_t)
3019iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3020{
3021 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3022 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3023
3024 switch (g_aGstShadowInfo[enmGstReg].cb)
3025 {
3026 case sizeof(uint64_t):
3027 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3028 case sizeof(uint32_t):
3029 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3030 case sizeof(uint16_t):
3031 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3032# if 0 /* not present in the table. */
3033 case sizeof(uint8_t):
3034 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3035# endif
3036 default:
3037 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3038 }
3039}
3040
3041
3042/**
3043 * Emits code to flush a pending write of the given guest register,
3044 * version with alternative core state.
3045 *
3046 * @returns New code buffer offset.
3047 * @param pReNative The native recompile state.
3048 * @param off Current code buffer position.
3049 * @param pCore Alternative core state.
3050 * @param enmGstReg The guest register to flush.
3051 */
3052DECL_HIDDEN_THROW(uint32_t)
3053iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3054{
3055 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3056
3057 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3058 && enmGstReg <= kIemNativeGstReg_GprLast)
3059 || enmGstReg == kIemNativeGstReg_MxCsr);
3060 Assert( idxHstReg != UINT8_MAX
3061 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3062 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3063 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3064
3065 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3066
3067 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3068 return off;
3069}
3070
3071
3072/**
3073 * Emits code to flush a pending write of the given guest register.
3074 *
3075 * @returns New code buffer offset.
3076 * @param pReNative The native recompile state.
3077 * @param off Current code buffer position.
3078 * @param enmGstReg The guest register to flush.
3079 */
3080DECL_HIDDEN_THROW(uint32_t)
3081iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3082{
3083 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3084
3085 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3086 && enmGstReg <= kIemNativeGstReg_GprLast)
3087 || enmGstReg == kIemNativeGstReg_MxCsr);
3088 Assert( idxHstReg != UINT8_MAX
3089 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3090 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3091 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3092
3093 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3094
3095 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3096 return off;
3097}
3098
3099
3100/**
3101 * Flush the given set of guest registers if marked as dirty.
3102 *
3103 * @returns New code buffer offset.
3104 * @param pReNative The native recompile state.
3105 * @param off Current code buffer position.
3106 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3107 * @note Must not modify the host status flags!
3108 */
3109DECL_HIDDEN_THROW(uint32_t)
3110iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3111{
3112 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3113 if (bmGstRegShadowDirty)
3114 {
3115# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3116 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3117 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3118# endif
3119 do
3120 {
3121 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3122 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3123 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3124 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3125 } while (bmGstRegShadowDirty);
3126 }
3127
3128 return off;
3129}
3130
3131
3132/**
3133 * Flush all shadowed guest registers marked as dirty for the given host register.
3134 *
3135 * @returns New code buffer offset.
3136 * @param pReNative The native recompile state.
3137 * @param off Current code buffer position.
3138 * @param idxHstReg The host register.
3139 *
3140 * @note This doesn't do any unshadowing of guest registers from the host register.
3141 *
3142 * @note Must not modify the host status flags!
3143 */
3144DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3145{
3146 /* We need to flush any pending guest register writes this host register shadows. */
3147 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3148 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3149 {
3150# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3151 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3152 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3153# endif
3154 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3155 do
3156 {
3157 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3158 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3159 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3160 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3161 } while (bmGstRegShadowDirty);
3162 }
3163
3164 return off;
3165}
3166
3167#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3168
3169
3170/**
3171 * Locate a register, possibly freeing one up.
3172 *
3173 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3174 * failed.
3175 *
3176 * @returns Host register number on success. Returns UINT8_MAX if no registers
3177 * found, the caller is supposed to deal with this and raise a
3178 * allocation type specific status code (if desired).
3179 *
3180 * @throws VBox status code if we're run into trouble spilling a variable of
3181 * recording debug info. Does NOT throw anything if we're out of
3182 * registers, though.
3183 *
3184 * @note Must not modify the host status flags!
3185 */
3186static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3187 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3188{
3189 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3190 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3191 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3192
3193 /*
3194 * Try a freed register that's shadowing a guest register.
3195 */
3196 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3197 if (fRegs)
3198 {
3199 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3200
3201#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3202 /*
3203 * When we have liveness information, we use it to kick out all shadowed
3204 * guest register that will not be needed any more in this TB. If we're
3205 * lucky, this may prevent us from ending up here again.
3206 *
3207 * Note! We must consider the previous entry here so we don't free
3208 * anything that the current threaded function requires (current
3209 * entry is produced by the next threaded function).
3210 */
3211 uint32_t const idxCurCall = pReNative->idxCurCall;
3212 if (idxCurCall > 0)
3213 {
3214 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3215 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3216
3217 /* Merge EFLAGS. */
3218 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3219 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3220 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3221 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3222 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3223
3224 /* If it matches any shadowed registers. */
3225 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3226 {
3227#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3228 /* Writeback any dirty shadow registers we are about to unshadow. */
3229 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3230#endif
3231
3232 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3233 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3234 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3235
3236 /* See if we've got any unshadowed registers we can return now. */
3237 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3238 if (fUnshadowedRegs)
3239 {
3240 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3241 return (fPreferVolatile
3242 ? ASMBitFirstSetU32(fUnshadowedRegs)
3243 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3244 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3245 - 1;
3246 }
3247 }
3248 }
3249#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3250
3251 unsigned const idxReg = (fPreferVolatile
3252 ? ASMBitFirstSetU32(fRegs)
3253 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3254 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3255 - 1;
3256
3257 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3258 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3259 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3260 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3261
3262#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3263 /* We need to flush any pending guest register writes this host register shadows. */
3264 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3265#endif
3266
3267 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3268 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3269 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3270 return idxReg;
3271 }
3272
3273 /*
3274 * Try free up a variable that's in a register.
3275 *
3276 * We do two rounds here, first evacuating variables we don't need to be
3277 * saved on the stack, then in the second round move things to the stack.
3278 */
3279 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3280 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3281 {
3282 uint32_t fVars = pReNative->Core.bmVars;
3283 while (fVars)
3284 {
3285 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3286 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3287#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3288 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3289 continue;
3290#endif
3291
3292 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3293 && (RT_BIT_32(idxReg) & fRegMask)
3294 && ( iLoop == 0
3295 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3296 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3297 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3298 {
3299 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3300 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3301 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3302 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3303 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3304 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3305#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3306 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3307#endif
3308
3309 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3310 {
3311 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3312 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3313 }
3314
3315 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3316 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3317
3318 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3319 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3320 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3321 return idxReg;
3322 }
3323 fVars &= ~RT_BIT_32(idxVar);
3324 }
3325 }
3326
3327 return UINT8_MAX;
3328}
3329
3330
3331/**
3332 * Reassigns a variable to a different register specified by the caller.
3333 *
3334 * @returns The new code buffer position.
3335 * @param pReNative The native recompile state.
3336 * @param off The current code buffer position.
3337 * @param idxVar The variable index.
3338 * @param idxRegOld The old host register number.
3339 * @param idxRegNew The new host register number.
3340 * @param pszCaller The caller for logging.
3341 */
3342static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3343 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3344{
3345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3346 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3347#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3348 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3349#endif
3350 RT_NOREF(pszCaller);
3351
3352#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3353 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3354#endif
3355 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3356
3357 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3358#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3359 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3360#endif
3361 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3362 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3363 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3364
3365 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3366 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3367 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3368 if (fGstRegShadows)
3369 {
3370 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3371 | RT_BIT_32(idxRegNew);
3372 while (fGstRegShadows)
3373 {
3374 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3375 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3376
3377 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3378 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3379 }
3380 }
3381
3382 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3383 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3384 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3385 return off;
3386}
3387
3388
3389/**
3390 * Moves a variable to a different register or spills it onto the stack.
3391 *
3392 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3393 * kinds can easily be recreated if needed later.
3394 *
3395 * @returns The new code buffer position.
3396 * @param pReNative The native recompile state.
3397 * @param off The current code buffer position.
3398 * @param idxVar The variable index.
3399 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3400 * call-volatile registers.
3401 */
3402DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3403 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3404{
3405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3406 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3407 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3408 Assert(!pVar->fRegAcquired);
3409
3410 uint8_t const idxRegOld = pVar->idxReg;
3411 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3412 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3413 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3414 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3415 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3416 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3417 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3418 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3419#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3420 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3421#endif
3422
3423
3424 /** @todo Add statistics on this.*/
3425 /** @todo Implement basic variable liveness analysis (python) so variables
3426 * can be freed immediately once no longer used. This has the potential to
3427 * be trashing registers and stack for dead variables.
3428 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3429
3430 /*
3431 * First try move it to a different register, as that's cheaper.
3432 */
3433 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3434 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3435 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3436 if (fRegs)
3437 {
3438 /* Avoid using shadow registers, if possible. */
3439 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3440 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3441 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3442 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3443 }
3444
3445 /*
3446 * Otherwise we must spill the register onto the stack.
3447 */
3448 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3449 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3450 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3451 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3452
3453 pVar->idxReg = UINT8_MAX;
3454 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3455 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3456 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3457 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3458 return off;
3459}
3460
3461
3462/**
3463 * Allocates a temporary host general purpose register.
3464 *
3465 * This may emit code to save register content onto the stack in order to free
3466 * up a register.
3467 *
3468 * @returns The host register number; throws VBox status code on failure,
3469 * so no need to check the return value.
3470 * @param pReNative The native recompile state.
3471 * @param poff Pointer to the variable with the code buffer
3472 * position. This will be update if we need to move
3473 * a variable from register to stack in order to
3474 * satisfy the request.
3475 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3476 * registers (@c true, default) or the other way
3477 * around (@c false, for
3478 * iemNativeRegAllocTmpForGuestReg()).
3479 *
3480 * @note Must not modify the host status flags!
3481 */
3482template<bool const a_fPreferVolatile>
3483DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3484{
3485 /*
3486 * Try find a completely unused register, preferably a call-volatile one.
3487 */
3488 uint8_t idxReg;
3489 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3490 & ~pReNative->Core.bmHstRegsWithGstShadow
3491 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3492 if (fRegs)
3493 {
3494 if (a_fPreferVolatile)
3495 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3496 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3497 else
3498 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3499 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3500 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3501 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3502 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3503 }
3504 else
3505 {
3506 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile);
3507 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3508 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3509 }
3510 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3511}
3512
3513
3514/** See iemNativeRegAllocTmpInt for details. */
3515DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3516{
3517 return iemNativeRegAllocTmpInt<true>(pReNative, poff);
3518}
3519
3520
3521/** See iemNativeRegAllocTmpInt for details. */
3522DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3523{
3524 return iemNativeRegAllocTmpInt<false>(pReNative, poff);
3525}
3526
3527
3528/**
3529 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3530 * registers.
3531 *
3532 * @returns The host register number; throws VBox status code on failure,
3533 * so no need to check the return value.
3534 * @param pReNative The native recompile state.
3535 * @param poff Pointer to the variable with the code buffer
3536 * position. This will be update if we need to move
3537 * a variable from register to stack in order to
3538 * satisfy the request.
3539 * @param fRegMask Mask of acceptable registers.
3540 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3541 * registers (@c true, default) or the other way
3542 * around (@c false, for
3543 * iemNativeRegAllocTmpForGuestReg()).
3544 */
3545template<bool const a_fPreferVolatile>
3546DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3547{
3548 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3549 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3550
3551 /*
3552 * Try find a completely unused register, preferably a call-volatile one.
3553 */
3554 uint8_t idxReg;
3555 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3556 & ~pReNative->Core.bmHstRegsWithGstShadow
3557 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3558 & fRegMask;
3559 if (fRegs)
3560 {
3561 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3562 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3563 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3564 else
3565 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3566 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3567 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3568 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3569 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3570 }
3571 else
3572 {
3573 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, fRegMask);
3574 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3575 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3576 }
3577 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3578}
3579
3580
3581/** See iemNativeRegAllocTmpExInt for details. */
3582DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3583{
3584 return iemNativeRegAllocTmpExInt<true>(pReNative, poff, fRegMask);
3585}
3586
3587
3588/** See iemNativeRegAllocTmpExInt for details. */
3589DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpExPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3590{
3591 return iemNativeRegAllocTmpExInt<false>(pReNative, poff, fRegMask);
3592}
3593
3594
3595/** Internal templated variation of iemNativeRegAllocTmpEx. */
3596template<uint32_t const a_fRegMask, bool const a_fPreferVolatile>
3597DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3598{
3599 AssertCompile(!(a_fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3600 AssertCompile(!(a_fRegMask & IEMNATIVE_REG_FIXED_MASK));
3601
3602 /*
3603 * Try find a completely unused register, preferably a call-volatile one.
3604 */
3605 uint8_t idxReg;
3606 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3607 & ~pReNative->Core.bmHstRegsWithGstShadow
3608 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3609 & a_fRegMask;
3610 if (fRegs)
3611 {
3612 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3613 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3614 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3615 else
3616 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3617 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3618 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3619 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3620 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3621 }
3622 else
3623 {
3624 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, a_fRegMask);
3625 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3626 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3627 }
3628 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3629}
3630
3631
3632/**
3633 * Allocates a temporary register for loading an immediate value into.
3634 *
3635 * This will emit code to load the immediate, unless there happens to be an
3636 * unused register with the value already loaded.
3637 *
3638 * The caller will not modify the returned register, it must be considered
3639 * read-only. Free using iemNativeRegFreeTmpImm.
3640 *
3641 * @returns The host register number; throws VBox status code on failure, so no
3642 * need to check the return value.
3643 * @param pReNative The native recompile state.
3644 * @param poff Pointer to the variable with the code buffer position.
3645 * @param uImm The immediate value that the register must hold upon
3646 * return.
3647 * @note Prefers volatile registers.
3648 * @note Reusing immediate values has not been implemented yet.
3649 */
3650DECL_HIDDEN_THROW(uint8_t)
3651iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm)
3652{
3653 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff);
3654 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3655 return idxReg;
3656}
3657
3658
3659/**
3660 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3661 * iemNativeRegAllocTmpForGuestEFlags().
3662 *
3663 * See iemNativeRegAllocTmpForGuestRegInt() for details.
3664 */
3665template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, uint32_t const a_fRegMask>
3666static uint8_t iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3667{
3668 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3669#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3670 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3671#endif
3672
3673 /*
3674 * First check if the guest register value is already in a host register.
3675 */
3676 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3677 {
3678 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3679 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3680 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3681 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3682
3683 /* It's not supposed to be allocated... */
3684 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3685 {
3686 /*
3687 * If the register will trash the guest shadow copy, try find a
3688 * completely unused register we can use instead. If that fails,
3689 * we need to disassociate the host reg from the guest reg.
3690 */
3691 /** @todo would be nice to know if preserving the register is in any way helpful. */
3692 /* If the purpose is calculations, try duplicate the register value as
3693 we'll be clobbering the shadow. */
3694 if ( a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3695 && ( ~pReNative->Core.bmHstRegs
3696 & ~pReNative->Core.bmHstRegsWithGstShadow
3697 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3698 {
3699 uint8_t const idxRegNew = iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3700
3701 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3702
3703 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3704 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3705 g_apszIemNativeHstRegNames[idxRegNew]));
3706 idxReg = idxRegNew;
3707 }
3708 /* If the current register matches the restrictions, go ahead and allocate
3709 it for the caller. */
3710 else if (a_fRegMask & RT_BIT_32(idxReg))
3711 {
3712 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3713 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3714 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3715 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3716 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n", g_apszIemNativeHstRegNames[idxReg],
3717 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3718 else
3719 {
3720 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3721 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3722 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3723 }
3724 }
3725 /* Otherwise, allocate a register that satisfies the caller and transfer
3726 the shadowing if compatible with the intended use. (This basically
3727 means the call wants a non-volatile register (RSP push/pop scenario).) */
3728 else
3729 {
3730 Assert(!(a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3731 uint8_t const idxRegNew = (a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3732 && a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3733 ? iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg))
3734 : iemNativeRegAllocTmpExPreferNonVolatile(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg));
3735 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3736 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3737 {
3738 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3739 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3740 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3741 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3742 }
3743 else
3744 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3745 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3746 g_apszIemNativeHstRegNames[idxRegNew]));
3747 idxReg = idxRegNew;
3748 }
3749 }
3750 else
3751 {
3752 /*
3753 * Oops. Shadowed guest register already allocated!
3754 *
3755 * Allocate a new register, copy the value and, if updating, the
3756 * guest shadow copy assignment to the new register.
3757 */
3758 AssertMsg( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3759 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3760 ("This shouldn't happen: idxReg=%d enmGstReg=%d a_enmIntendedUse=%s\n",
3761 idxReg, enmGstReg, s_pszIntendedUse[a_enmIntendedUse]));
3762
3763 /** @todo share register for readonly access. */
3764 uint8_t const idxRegNew = a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3765 ? iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff)
3766 : iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff);
3767
3768 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3769 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3770
3771 if RT_CONSTEXPR_IF( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3772 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3773 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3774 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3775 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3776 else
3777 {
3778 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3779 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3780 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3781 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3782 }
3783 idxReg = idxRegNew;
3784 }
3785 Assert(RT_BIT_32(idxReg) & a_fRegMask); /* See assumption in fNoVolatileRegs docs. */
3786
3787#ifdef VBOX_STRICT
3788 /* Strict builds: Check that the value is correct. */
3789 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3790#endif
3791
3792#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3793 /** @todo r=aeichner Implement for registers other than GPR as well. */
3794 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3795 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3796 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3797 && enmGstReg <= kIemNativeGstReg_GprLast)
3798 || enmGstReg == kIemNativeGstReg_MxCsr)
3799 {
3800# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3801 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3802 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3803# endif
3804 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3805 }
3806#endif
3807
3808 return idxReg;
3809 }
3810
3811 /*
3812 * Allocate a new register, load it with the guest value and designate it as a copy of the
3813 */
3814 uint8_t const idxRegNew = a_enmIntendedUse != kIemNativeGstRegUse_Calculation
3815 ? iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff)
3816 : iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3817
3818 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3819 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3820
3821 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3822 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3823 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3824 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3825
3826#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3827 /** @todo r=aeichner Implement for registers other than GPR as well. */
3828 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3829 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3830 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3831 && enmGstReg <= kIemNativeGstReg_GprLast)
3832 || enmGstReg == kIemNativeGstReg_MxCsr)
3833 {
3834# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3835 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3836 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3837# endif
3838 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3839 }
3840#endif
3841
3842 return idxRegNew;
3843}
3844
3845
3846/**
3847 * Allocates a temporary host general purpose register for keeping a guest
3848 * register value.
3849 *
3850 * Since we may already have a register holding the guest register value,
3851 * code will be emitted to do the loading if that's not the case. Code may also
3852 * be emitted if we have to free up a register to satify the request.
3853 *
3854 * @returns The host register number; throws VBox status code on failure, so no
3855 * need to check the return value.
3856 * @param pReNative The native recompile state.
3857 * @param poff Pointer to the variable with the code buffer
3858 * position. This will be update if we need to move
3859 * a variable from register to stack in order to
3860 * satisfy the request.
3861 * @param enmGstReg The guest register that will is to be updated.
3862 * @tparam a_enmIntendedUse How the caller will be using the host register.
3863 * @tparam a_fNonVolatileRegs Set if no volatile register allowed, clear if
3864 * any register is okay (default).
3865 * The ASSUMPTION here is that the caller has
3866 * already flushed all volatile registers,
3867 * so this is only applied if we allocate a new
3868 * register.
3869 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3870 */
3871template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, bool const a_fNonVolatileRegs>
3872DECL_FORCE_INLINE_THROW(uint8_t)
3873iemNativeRegAllocTmpForGuestRegInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3874{
3875#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3876 AssertMsg( pReNative->idxCurCall == 0
3877 || enmGstReg == kIemNativeGstReg_Pc
3878 || (a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3879 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3880 : a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3881 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3882 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3883 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3884#endif
3885
3886 if RT_CONSTEXPR_IF(!a_fNonVolatileRegs)
3887 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3888 IEMNATIVE_HST_GREG_MASK
3889 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, enmGstReg);
3890 else /* keep else, is required by MSC */
3891 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3892 IEMNATIVE_HST_GREG_MASK
3893 & ~IEMNATIVE_REG_FIXED_MASK
3894 & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK>(pReNative, poff, enmGstReg);
3895}
3896
3897/* Variants including volatile registers: */
3898
3899DECL_HIDDEN_THROW(uint8_t)
3900iemNativeRegAllocTmpForGuestRegReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3901{
3902 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, false>(pReNative, poff, enmGstReg);
3903}
3904
3905DECL_HIDDEN_THROW(uint8_t)
3906iemNativeRegAllocTmpForGuestRegUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3907{
3908 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, false>(pReNative, poff, enmGstReg);
3909}
3910
3911DECL_HIDDEN_THROW(uint8_t)
3912iemNativeRegAllocTmpForGuestRegFullWrite(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3913{
3914 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, false>(pReNative, poff, enmGstReg);
3915}
3916
3917DECL_HIDDEN_THROW(uint8_t)
3918iemNativeRegAllocTmpForGuestRegCalculation(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3919{
3920 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, false>(pReNative, poff, enmGstReg);
3921}
3922
3923/* Variants excluding any volatile registers: */
3924
3925DECL_HIDDEN_THROW(uint8_t)
3926iemNativeRegAllocTmpForGuestRegReadOnlyNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3927{
3928 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, true>(pReNative, poff, enmGstReg);
3929}
3930
3931DECL_HIDDEN_THROW(uint8_t)
3932iemNativeRegAllocTmpForGuestRegUpdateNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3933{
3934 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, true>(pReNative, poff, enmGstReg);
3935}
3936
3937DECL_HIDDEN_THROW(uint8_t)
3938iemNativeRegAllocTmpForGuestRegFullWriteNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3939{
3940 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, true>(pReNative, poff, enmGstReg);
3941}
3942
3943DECL_HIDDEN_THROW(uint8_t)
3944iemNativeRegAllocTmpForGuestRegCalculationNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3945{
3946 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, true>(pReNative, poff, enmGstReg);
3947}
3948
3949
3950
3951#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3952/**
3953 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3954 *
3955 * This takes additional arguments for covering liveness assertions in strict
3956 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3957 * kIemNativeGstReg_EFlags as argument.
3958 */
3959template<IEMNATIVEGSTREGUSE const a_enmIntendedUse>
3960DECL_FORCE_INLINE_THROW(uint8_t)
3961iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3962 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3963{
3964 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3965 {
3966 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3967 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3968 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3969 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3970 uint32_t fState;
3971# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3972 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3973 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3974 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3975 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3976 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3977 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3978 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3979 ) \
3980 , ("%s - %u\n", #a_enmGstEfl, fState))
3981 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3982 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3983 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3984 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3985 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3986 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3987 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3988# undef MY_ASSERT_ONE_EFL
3989 }
3990 RT_NOREF(fPotentialCall);
3991
3992 AssertCompile(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate);
3993 if RT_CONSTEXPR_IF(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly)
3994 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ReadOnly,
3995 IEMNATIVE_HST_GREG_MASK
3996 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3997 else /* keep else, is required by MSC */
3998 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ForUpdate,
3999 IEMNATIVE_HST_GREG_MASK
4000 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
4001}
4002
4003
4004DECL_HIDDEN_THROW(uint8_t)
4005iemNativeRegAllocTmpForGuestEFlagsReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4006 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
4007{
4008 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ReadOnly>(pReNative, poff, fRead, fWrite, fPotentialCall);
4009}
4010
4011DECL_HIDDEN_THROW(uint8_t)
4012iemNativeRegAllocTmpForGuestEFlagsForUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
4013 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
4014{
4015 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ForUpdate>(pReNative, poff, fRead, fWrite, fPotentialCall);
4016}
4017
4018#endif
4019
4020
4021
4022/**
4023 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
4024 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
4025 *
4026 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
4027 */
4028DECL_FORCE_INLINE(uint8_t)
4029iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4030{
4031 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4032
4033 /*
4034 * First check if the guest register value is already in a host register.
4035 */
4036 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4037 {
4038 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4039 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4040 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4041 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4042
4043 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4044 {
4045 /*
4046 * We only do readonly use here, so easy compared to the other
4047 * variant of this code.
4048 */
4049 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4050 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4051 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4052 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4053 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4054
4055#ifdef VBOX_STRICT
4056 /* Strict builds: Check that the value is correct. */
4057 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4058#else
4059 RT_NOREF(poff);
4060#endif
4061 return idxReg;
4062 }
4063 }
4064
4065 return UINT8_MAX;
4066}
4067
4068
4069/**
4070 * Allocates a temporary host general purpose register that already holds the
4071 * given guest register value.
4072 *
4073 * The use case for this function is places where the shadowing state cannot be
4074 * modified due to branching and such. This will fail if the we don't have a
4075 * current shadow copy handy or if it's incompatible. The only code that will
4076 * be emitted here is value checking code in strict builds.
4077 *
4078 * The intended use can only be readonly!
4079 *
4080 * @returns The host register number, UINT8_MAX if not present.
4081 * @param pReNative The native recompile state.
4082 * @param poff Pointer to the instruction buffer offset.
4083 * Will be updated in strict builds if a register is
4084 * found.
4085 * @param enmGstReg The guest register that will is to be updated.
4086 * @note In strict builds, this may throw instruction buffer growth failures.
4087 * Non-strict builds will not throw anything.
4088 * @sa iemNativeRegAllocTmpForGuestReg
4089 */
4090DECL_HIDDEN_THROW(uint8_t)
4091iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4092{
4093#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4094 AssertMsg( pReNative->idxCurCall == 0
4095 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4096 || enmGstReg == kIemNativeGstReg_Pc
4097 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4098#endif
4099 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
4100}
4101
4102
4103#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
4104/**
4105 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
4106 * EFLAGS.
4107 *
4108 * This takes additional arguments for covering liveness assertions in strict
4109 * builds, it's otherwise the same as
4110 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
4111 * kIemNativeGstReg_EFlags as argument.
4112 *
4113 * @note The @a fWrite parameter is necessary to complete the liveness picture,
4114 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
4115 * commit. It the operation clobbers all the flags, @a fRead will be
4116 * zero, so better verify the whole picture while we're here.
4117 */
4118DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4119 uint64_t fRead, uint64_t fWrite /*=0*/)
4120{
4121 if (pReNative->idxCurCall != 0)
4122 {
4123 Assert(fRead | fWrite);
4124 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4125 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4126 uint64_t const fAll = fRead | fWrite;
4127 uint32_t fState;
4128# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
4129 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
4130 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
4131 || ( fRead & RT_BIT_64(a_enmGstEfl) \
4132 ? fWrite & RT_BIT_64(a_enmGstEfl) \
4133 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
4134 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
4135 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
4136 ) \
4137 , ("%s - %u\n", #a_enmGstEfl, fState))
4138 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
4139 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
4140 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
4141 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
4142 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
4143 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
4144 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
4145# undef MY_ASSERT_ONE_EFL
4146 }
4147 RT_NOREF(fRead);
4148 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
4149}
4150#endif
4151
4152
4153/**
4154 * Allocates argument registers for a function call.
4155 *
4156 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4157 * need to check the return value.
4158 * @param pReNative The native recompile state.
4159 * @param off The current code buffer offset.
4160 * @param cArgs The number of arguments the function call takes.
4161 */
4162DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4163{
4164 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4165 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4166 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4167 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4168
4169 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4170 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4171 else if (cArgs == 0)
4172 return true;
4173
4174 /*
4175 * Do we get luck and all register are free and not shadowing anything?
4176 */
4177 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4178 for (uint32_t i = 0; i < cArgs; i++)
4179 {
4180 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4181 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4182 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4183 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4184 }
4185 /*
4186 * Okay, not lucky so we have to free up the registers.
4187 */
4188 else
4189 for (uint32_t i = 0; i < cArgs; i++)
4190 {
4191 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4192 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4193 {
4194 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4195 {
4196 case kIemNativeWhat_Var:
4197 {
4198 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4199 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4200 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4201 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4202 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4204 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4205#endif
4206
4207 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4208 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4209 else
4210 {
4211 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4212 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4213 }
4214 break;
4215 }
4216
4217 case kIemNativeWhat_Tmp:
4218 case kIemNativeWhat_Arg:
4219 case kIemNativeWhat_rc:
4220 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4221 default:
4222 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4223 }
4224
4225 }
4226 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4227 {
4228 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4229 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4230 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4231#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4232 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4233#endif
4234 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4235 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4236 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4237 }
4238 else
4239 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4240 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4241 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4242 }
4243 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4244 return true;
4245}
4246
4247
4248DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4249
4250
4251#if 0
4252/**
4253 * Frees a register assignment of any type.
4254 *
4255 * @param pReNative The native recompile state.
4256 * @param idxHstReg The register to free.
4257 *
4258 * @note Does not update variables.
4259 */
4260DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4261{
4262 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4263 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4264 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4265 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4266 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4267 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4268 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4269 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4270 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4271 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4272 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4273 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4274 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4275 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4276
4277 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4278 /* no flushing, right:
4279 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4282 */
4283}
4284#endif
4285
4286
4287/**
4288 * Frees a temporary register.
4289 *
4290 * Any shadow copies of guest registers assigned to the host register will not
4291 * be flushed by this operation.
4292 */
4293DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4294{
4295 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4296 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4297 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4298 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4299 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4300}
4301
4302
4303/**
4304 * Frees a temporary immediate register.
4305 *
4306 * It is assumed that the call has not modified the register, so it still hold
4307 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4308 */
4309DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4310{
4311 iemNativeRegFreeTmp(pReNative, idxHstReg);
4312}
4313
4314
4315/**
4316 * Frees a register assigned to a variable.
4317 *
4318 * The register will be disassociated from the variable.
4319 */
4320DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4321{
4322 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4323 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4324 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4325 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4326 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4327#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4328 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4329#endif
4330
4331 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4332 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4333 if (!fFlushShadows)
4334 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4335 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4336 else
4337 {
4338 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4339 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4340#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4341 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4342#endif
4343 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4344 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4345 uint64_t fGstRegShadows = fGstRegShadowsOld;
4346 while (fGstRegShadows)
4347 {
4348 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4349 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4350
4351 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4352 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4353 }
4354 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4355 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4356 }
4357}
4358
4359
4360#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4361# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4362/** Host CPU SIMD register names. */
4363DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4364{
4365# ifdef RT_ARCH_AMD64
4366 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4367# elif RT_ARCH_ARM64
4368 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4369 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4370# else
4371# error "port me"
4372# endif
4373};
4374# endif
4375
4376
4377/**
4378 * Frees a SIMD register assigned to a variable.
4379 *
4380 * The register will be disassociated from the variable.
4381 */
4382DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4383{
4384 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4385 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4386 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4387 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4388 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4389 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4390
4391 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4392 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4393 if (!fFlushShadows)
4394 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4395 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4396 else
4397 {
4398 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4399 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4400 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4401 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4402 uint64_t fGstRegShadows = fGstRegShadowsOld;
4403 while (fGstRegShadows)
4404 {
4405 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4406 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4407
4408 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4409 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4410 }
4411 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4412 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4413 }
4414}
4415
4416
4417/**
4418 * Reassigns a variable to a different SIMD register specified by the caller.
4419 *
4420 * @returns The new code buffer position.
4421 * @param pReNative The native recompile state.
4422 * @param off The current code buffer position.
4423 * @param idxVar The variable index.
4424 * @param idxRegOld The old host register number.
4425 * @param idxRegNew The new host register number.
4426 * @param pszCaller The caller for logging.
4427 */
4428static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4429 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4430{
4431 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4432 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4433 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4434 RT_NOREF(pszCaller);
4435
4436 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4437 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4438 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4439
4440 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4441 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4442 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4443
4444 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4445 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4446 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4447
4448 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4449 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4450 else
4451 {
4452 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4453 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4454 }
4455
4456 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4457 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4458 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4459 if (fGstRegShadows)
4460 {
4461 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4462 | RT_BIT_32(idxRegNew);
4463 while (fGstRegShadows)
4464 {
4465 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4466 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4467
4468 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4469 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4470 }
4471 }
4472
4473 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4474 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4475 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4476 return off;
4477}
4478
4479
4480/**
4481 * Moves a variable to a different register or spills it onto the stack.
4482 *
4483 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4484 * kinds can easily be recreated if needed later.
4485 *
4486 * @returns The new code buffer position.
4487 * @param pReNative The native recompile state.
4488 * @param off The current code buffer position.
4489 * @param idxVar The variable index.
4490 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4491 * call-volatile registers.
4492 */
4493DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4494 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4495{
4496 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4497 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4498 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4499 Assert(!pVar->fRegAcquired);
4500 Assert(!pVar->fSimdReg);
4501
4502 uint8_t const idxRegOld = pVar->idxReg;
4503 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4504 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4505 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4506 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4507 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4508 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4509 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4510 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4511 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4512 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4513
4514 /** @todo Add statistics on this.*/
4515 /** @todo Implement basic variable liveness analysis (python) so variables
4516 * can be freed immediately once no longer used. This has the potential to
4517 * be trashing registers and stack for dead variables.
4518 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4519
4520 /*
4521 * First try move it to a different register, as that's cheaper.
4522 */
4523 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4524 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4525 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4526 if (fRegs)
4527 {
4528 /* Avoid using shadow registers, if possible. */
4529 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4530 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4531 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4532 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4533 }
4534
4535 /*
4536 * Otherwise we must spill the register onto the stack.
4537 */
4538 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4539 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4540 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4541
4542 if (pVar->cbVar == sizeof(RTUINT128U))
4543 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4544 else
4545 {
4546 Assert(pVar->cbVar == sizeof(RTUINT256U));
4547 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4548 }
4549
4550 pVar->idxReg = UINT8_MAX;
4551 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4552 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4553 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4554 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4555 return off;
4556}
4557
4558
4559/**
4560 * Called right before emitting a call instruction to move anything important
4561 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4562 * optionally freeing argument variables.
4563 *
4564 * @returns New code buffer offset, UINT32_MAX on failure.
4565 * @param pReNative The native recompile state.
4566 * @param off The code buffer offset.
4567 * @param cArgs The number of arguments the function call takes.
4568 * It is presumed that the host register part of these have
4569 * been allocated as such already and won't need moving,
4570 * just freeing.
4571 * @param fKeepVars Mask of variables that should keep their register
4572 * assignments. Caller must take care to handle these.
4573 */
4574DECL_HIDDEN_THROW(uint32_t)
4575iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4576{
4577 Assert(!cArgs); RT_NOREF(cArgs);
4578
4579 /* fKeepVars will reduce this mask. */
4580 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4581
4582 /*
4583 * Move anything important out of volatile registers.
4584 */
4585 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4586#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4587 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4588#endif
4589 ;
4590
4591 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4592 if (!fSimdRegsToMove)
4593 { /* likely */ }
4594 else
4595 {
4596 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4597 while (fSimdRegsToMove != 0)
4598 {
4599 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4600 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4601
4602 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4603 {
4604 case kIemNativeWhat_Var:
4605 {
4606 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4607 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4608 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4609 Assert(pVar->idxReg == idxSimdReg);
4610 Assert(pVar->fSimdReg);
4611 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4612 {
4613 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4614 idxVar, pVar->enmKind, pVar->idxReg));
4615 if (pVar->enmKind != kIemNativeVarKind_Stack)
4616 pVar->idxReg = UINT8_MAX;
4617 else
4618 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4619 }
4620 else
4621 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4622 continue;
4623 }
4624
4625 case kIemNativeWhat_Arg:
4626 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4627 continue;
4628
4629 case kIemNativeWhat_rc:
4630 case kIemNativeWhat_Tmp:
4631 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4632 continue;
4633
4634 case kIemNativeWhat_FixedReserved:
4635#ifdef RT_ARCH_ARM64
4636 continue; /* On ARM the upper half of the virtual 256-bit register. */
4637#endif
4638
4639 case kIemNativeWhat_FixedTmp:
4640 case kIemNativeWhat_pVCpuFixed:
4641 case kIemNativeWhat_pCtxFixed:
4642 case kIemNativeWhat_PcShadow:
4643 case kIemNativeWhat_Invalid:
4644 case kIemNativeWhat_End:
4645 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4646 }
4647 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4648 }
4649 }
4650
4651 /*
4652 * Do the actual freeing.
4653 */
4654 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4655 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4656 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4657 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4658
4659 /* If there are guest register shadows in any call-volatile register, we
4660 have to clear the corrsponding guest register masks for each register. */
4661 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4662 if (fHstSimdRegsWithGstShadow)
4663 {
4664 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4665 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4666 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4667 do
4668 {
4669 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4670 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4671
4672 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4673
4674#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4675 /*
4676 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4677 * to call volatile registers).
4678 */
4679 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4680 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4681 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4682#endif
4683 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4684 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4685
4686 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4687 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4688 } while (fHstSimdRegsWithGstShadow != 0);
4689 }
4690
4691 return off;
4692}
4693#endif
4694
4695
4696/**
4697 * Called right before emitting a call instruction to move anything important
4698 * out of call-volatile registers, free and flush the call-volatile registers,
4699 * optionally freeing argument variables.
4700 *
4701 * @returns New code buffer offset, UINT32_MAX on failure.
4702 * @param pReNative The native recompile state.
4703 * @param off The code buffer offset.
4704 * @param cArgs The number of arguments the function call takes.
4705 * It is presumed that the host register part of these have
4706 * been allocated as such already and won't need moving,
4707 * just freeing.
4708 * @param fKeepVars Mask of variables that should keep their register
4709 * assignments. Caller must take care to handle these.
4710 */
4711DECL_HIDDEN_THROW(uint32_t)
4712iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4713{
4714 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4715
4716 /* fKeepVars will reduce this mask. */
4717 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4718
4719#ifdef RT_ARCH_ARM64
4720AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4721#endif
4722
4723 /*
4724 * Move anything important out of volatile registers.
4725 */
4726 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4727 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4728 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4729#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4730 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4731#endif
4732 & ~g_afIemNativeCallRegs[cArgs];
4733
4734 fRegsToMove &= pReNative->Core.bmHstRegs;
4735 if (!fRegsToMove)
4736 { /* likely */ }
4737 else
4738 {
4739 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4740 while (fRegsToMove != 0)
4741 {
4742 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4743 fRegsToMove &= ~RT_BIT_32(idxReg);
4744
4745 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4746 {
4747 case kIemNativeWhat_Var:
4748 {
4749 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4751 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4752 Assert(pVar->idxReg == idxReg);
4753#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4754 Assert(!pVar->fSimdReg);
4755#endif
4756 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4757 {
4758 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4759 idxVar, pVar->enmKind, pVar->idxReg));
4760 if (pVar->enmKind != kIemNativeVarKind_Stack)
4761 pVar->idxReg = UINT8_MAX;
4762 else
4763 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4764 }
4765 else
4766 fRegsToFree &= ~RT_BIT_32(idxReg);
4767 continue;
4768 }
4769
4770 case kIemNativeWhat_Arg:
4771 AssertMsgFailed(("What?!?: %u\n", idxReg));
4772 continue;
4773
4774 case kIemNativeWhat_rc:
4775 case kIemNativeWhat_Tmp:
4776 AssertMsgFailed(("Missing free: %u\n", idxReg));
4777 continue;
4778
4779 case kIemNativeWhat_FixedTmp:
4780 case kIemNativeWhat_pVCpuFixed:
4781 case kIemNativeWhat_pCtxFixed:
4782 case kIemNativeWhat_PcShadow:
4783 case kIemNativeWhat_FixedReserved:
4784 case kIemNativeWhat_Invalid:
4785 case kIemNativeWhat_End:
4786 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4787 }
4788 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4789 }
4790 }
4791
4792 /*
4793 * Do the actual freeing.
4794 */
4795 if (pReNative->Core.bmHstRegs & fRegsToFree)
4796 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4797 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4798 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4799
4800 /* If there are guest register shadows in any call-volatile register, we
4801 have to clear the corrsponding guest register masks for each register. */
4802 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4803 if (fHstRegsWithGstShadow)
4804 {
4805 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4806 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4807 fHstRegsWithGstShadow));
4808 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4809 do
4810 {
4811 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4812 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4813
4814 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4815
4816#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4817 /*
4818 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4819 * to call volatile registers).
4820 */
4821 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4822 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4823 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4824#endif
4825
4826 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4827 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4828 } while (fHstRegsWithGstShadow != 0);
4829 }
4830
4831#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4832 /* Now for the SIMD registers, no argument support for now. */
4833 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4834#endif
4835
4836 return off;
4837}
4838
4839
4840/**
4841 * Flushes a set of guest register shadow copies.
4842 *
4843 * This is usually done after calling a threaded function or a C-implementation
4844 * of an instruction.
4845 *
4846 * @param pReNative The native recompile state.
4847 * @param fGstRegs Set of guest registers to flush.
4848 */
4849DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4850{
4851 /*
4852 * Reduce the mask by what's currently shadowed
4853 */
4854 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4855 fGstRegs &= bmGstRegShadowsOld;
4856 if (fGstRegs)
4857 {
4858 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4859 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4860 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4861 if (bmGstRegShadowsNew)
4862 {
4863 /*
4864 * Partial.
4865 */
4866 do
4867 {
4868 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4869 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4870 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4871 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4872 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4873#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4874 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4875#endif
4876
4877 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4878 fGstRegs &= ~fInThisHstReg;
4879 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4880 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4881 if (!fGstRegShadowsNew)
4882 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4883 } while (fGstRegs != 0);
4884 }
4885 else
4886 {
4887 /*
4888 * Clear all.
4889 */
4890 do
4891 {
4892 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4893 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4894 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4895 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4896 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4897#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4898 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4899#endif
4900
4901 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4902 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4903 } while (fGstRegs != 0);
4904 pReNative->Core.bmHstRegsWithGstShadow = 0;
4905 }
4906 }
4907}
4908
4909
4910/**
4911 * Flushes guest register shadow copies held by a set of host registers.
4912 *
4913 * This is used with the TLB lookup code for ensuring that we don't carry on
4914 * with any guest shadows in volatile registers, as these will get corrupted by
4915 * a TLB miss.
4916 *
4917 * @param pReNative The native recompile state.
4918 * @param fHstRegs Set of host registers to flush guest shadows for.
4919 */
4920DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4921{
4922 /*
4923 * Reduce the mask by what's currently shadowed.
4924 */
4925 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4926 fHstRegs &= bmHstRegsWithGstShadowOld;
4927 if (fHstRegs)
4928 {
4929 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4930 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4931 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4932 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4933 if (bmHstRegsWithGstShadowNew)
4934 {
4935 /*
4936 * Partial (likely).
4937 */
4938 uint64_t fGstShadows = 0;
4939 do
4940 {
4941 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4942 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4943 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4944 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4945#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4946 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4947#endif
4948
4949 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4950 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4951 fHstRegs &= ~RT_BIT_32(idxHstReg);
4952 } while (fHstRegs != 0);
4953 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4954 }
4955 else
4956 {
4957 /*
4958 * Clear all.
4959 */
4960 do
4961 {
4962 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4963 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4964 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4965 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4966#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4967 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4968#endif
4969
4970 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4971 fHstRegs &= ~RT_BIT_32(idxHstReg);
4972 } while (fHstRegs != 0);
4973 pReNative->Core.bmGstRegShadows = 0;
4974 }
4975 }
4976}
4977
4978
4979/**
4980 * Restores guest shadow copies in volatile registers.
4981 *
4982 * This is used after calling a helper function (think TLB miss) to restore the
4983 * register state of volatile registers.
4984 *
4985 * @param pReNative The native recompile state.
4986 * @param off The code buffer offset.
4987 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4988 * be active (allocated) w/o asserting. Hack.
4989 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4990 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4991 */
4992DECL_HIDDEN_THROW(uint32_t)
4993iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4994{
4995 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4996 if (fHstRegs)
4997 {
4998 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4999 do
5000 {
5001 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5002
5003 /* It's not fatal if a register is active holding a variable that
5004 shadowing a guest register, ASSUMING all pending guest register
5005 writes were flushed prior to the helper call. However, we'll be
5006 emitting duplicate restores, so it wasts code space. */
5007 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5008 RT_NOREF(fHstRegsActiveShadows);
5009
5010 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5011#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5012 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
5013#endif
5014 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5015 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5016 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5017
5018 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5019 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5020
5021 fHstRegs &= ~RT_BIT_32(idxHstReg);
5022 } while (fHstRegs != 0);
5023 }
5024 return off;
5025}
5026
5027
5028
5029
5030/*********************************************************************************************************************************
5031* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5032*********************************************************************************************************************************/
5033#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5034
5035/**
5036 * Info about shadowed guest SIMD register values.
5037 * @see IEMNATIVEGSTSIMDREG
5038 */
5039static struct
5040{
5041 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5042 uint32_t offXmm;
5043 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5044 uint32_t offYmm;
5045 /** Name (for logging). */
5046 const char *pszName;
5047} const g_aGstSimdShadowInfo[] =
5048{
5049#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5050 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5051 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5052 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5053 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5054 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5055 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5056 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5057 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5058 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5059 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5060 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5061 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5062 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5063 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5064 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5065 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5066 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5067#undef CPUMCTX_OFF_AND_SIZE
5068};
5069AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5070
5071
5072/**
5073 * Frees a temporary SIMD register.
5074 *
5075 * Any shadow copies of guest registers assigned to the host register will not
5076 * be flushed by this operation.
5077 */
5078DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5079{
5080 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5081 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5082 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5083 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5084 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5085}
5086
5087
5088/**
5089 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5090 *
5091 * @returns New code bufferoffset.
5092 * @param pReNative The native recompile state.
5093 * @param off Current code buffer position.
5094 * @param enmGstSimdReg The guest SIMD register to flush.
5095 */
5096DECL_HIDDEN_THROW(uint32_t)
5097iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5098{
5099 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5100
5101 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5102 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5103 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5104 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5105
5106 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5107 {
5108 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5109 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5110 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5111 }
5112
5113 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5114 {
5115 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5116 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5117 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5118 }
5119
5120 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5121 return off;
5122}
5123
5124
5125/**
5126 * Flush the given set of guest SIMD registers if marked as dirty.
5127 *
5128 * @returns New code buffer offset.
5129 * @param pReNative The native recompile state.
5130 * @param off Current code buffer position.
5131 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5132 */
5133DECL_HIDDEN_THROW(uint32_t)
5134iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5135{
5136 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5137 & fFlushGstSimdReg;
5138 if (bmGstSimdRegShadowDirty)
5139 {
5140# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5141 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5142 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5143# endif
5144
5145 do
5146 {
5147 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5148 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5149 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5150 } while (bmGstSimdRegShadowDirty);
5151 }
5152
5153 return off;
5154}
5155
5156
5157#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5158/**
5159 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5160 *
5161 * @returns New code buffer offset.
5162 * @param pReNative The native recompile state.
5163 * @param off Current code buffer position.
5164 * @param idxHstSimdReg The host SIMD register.
5165 *
5166 * @note This doesn't do any unshadowing of guest registers from the host register.
5167 */
5168DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5169{
5170 /* We need to flush any pending guest register writes this host register shadows. */
5171 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5172 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5173 if (bmGstSimdRegShadowDirty)
5174 {
5175# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5176 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5177 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5178# endif
5179
5180 do
5181 {
5182 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5183 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5184 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5185 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5186 } while (bmGstSimdRegShadowDirty);
5187 }
5188
5189 return off;
5190}
5191#endif
5192
5193
5194/**
5195 * Locate a register, possibly freeing one up.
5196 *
5197 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5198 * failed.
5199 *
5200 * @returns Host register number on success. Returns UINT8_MAX if no registers
5201 * found, the caller is supposed to deal with this and raise a
5202 * allocation type specific status code (if desired).
5203 *
5204 * @throws VBox status code if we're run into trouble spilling a variable of
5205 * recording debug info. Does NOT throw anything if we're out of
5206 * registers, though.
5207 */
5208static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5209 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5210{
5211 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5212 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5213 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5214
5215 /*
5216 * Try a freed register that's shadowing a guest register.
5217 */
5218 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5219 if (fRegs)
5220 {
5221 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5222
5223#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5224 /*
5225 * When we have livness information, we use it to kick out all shadowed
5226 * guest register that will not be needed any more in this TB. If we're
5227 * lucky, this may prevent us from ending up here again.
5228 *
5229 * Note! We must consider the previous entry here so we don't free
5230 * anything that the current threaded function requires (current
5231 * entry is produced by the next threaded function).
5232 */
5233 uint32_t const idxCurCall = pReNative->idxCurCall;
5234 if (idxCurCall > 0)
5235 {
5236 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5237 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5238
5239 /* If it matches any shadowed registers. */
5240 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5241 {
5242 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5243 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5244 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5245
5246 /* See if we've got any unshadowed registers we can return now. */
5247 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5248 if (fUnshadowedRegs)
5249 {
5250 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5251 return (fPreferVolatile
5252 ? ASMBitFirstSetU32(fUnshadowedRegs)
5253 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5254 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5255 - 1;
5256 }
5257 }
5258 }
5259#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5260
5261 unsigned const idxReg = (fPreferVolatile
5262 ? ASMBitFirstSetU32(fRegs)
5263 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5264 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5265 - 1;
5266
5267 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5268 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5269 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5270 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5271
5272 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5273 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5274
5275 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5276 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5277 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5278 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5279 return idxReg;
5280 }
5281
5282 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5283
5284 /*
5285 * Try free up a variable that's in a register.
5286 *
5287 * We do two rounds here, first evacuating variables we don't need to be
5288 * saved on the stack, then in the second round move things to the stack.
5289 */
5290 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5291 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5292 {
5293 uint32_t fVars = pReNative->Core.bmVars;
5294 while (fVars)
5295 {
5296 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5297 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5298 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5299 continue;
5300
5301 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5302 && (RT_BIT_32(idxReg) & fRegMask)
5303 && ( iLoop == 0
5304 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5305 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5306 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5307 {
5308 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5309 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5310 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5311 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5312 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5313 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5314
5315 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5316 {
5317 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5318 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5319 }
5320
5321 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5322 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5323
5324 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5325 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5326 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5327 return idxReg;
5328 }
5329 fVars &= ~RT_BIT_32(idxVar);
5330 }
5331 }
5332
5333 AssertFailed();
5334 return UINT8_MAX;
5335}
5336
5337
5338/**
5339 * Flushes a set of guest register shadow copies.
5340 *
5341 * This is usually done after calling a threaded function or a C-implementation
5342 * of an instruction.
5343 *
5344 * @param pReNative The native recompile state.
5345 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5346 */
5347DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5348{
5349 /*
5350 * Reduce the mask by what's currently shadowed
5351 */
5352 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5353 fGstSimdRegs &= bmGstSimdRegShadows;
5354 if (fGstSimdRegs)
5355 {
5356 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5357 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5358 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5359 if (bmGstSimdRegShadowsNew)
5360 {
5361 /*
5362 * Partial.
5363 */
5364 do
5365 {
5366 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5367 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5368 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5369 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5370 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5371 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5372
5373 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5374 fGstSimdRegs &= ~fInThisHstReg;
5375 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5376 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5377 if (!fGstRegShadowsNew)
5378 {
5379 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5380 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5381 }
5382 } while (fGstSimdRegs != 0);
5383 }
5384 else
5385 {
5386 /*
5387 * Clear all.
5388 */
5389 do
5390 {
5391 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5392 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5393 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5394 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5395 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5396 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5397
5398 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5399 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5400 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5401 } while (fGstSimdRegs != 0);
5402 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5403 }
5404 }
5405}
5406
5407
5408/**
5409 * Allocates a temporary host SIMD register.
5410 *
5411 * This may emit code to save register content onto the stack in order to free
5412 * up a register.
5413 *
5414 * @returns The host register number; throws VBox status code on failure,
5415 * so no need to check the return value.
5416 * @param pReNative The native recompile state.
5417 * @param poff Pointer to the variable with the code buffer position.
5418 * This will be update if we need to move a variable from
5419 * register to stack in order to satisfy the request.
5420 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5421 * registers (@c true, default) or the other way around
5422 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5423 */
5424DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5425{
5426 /*
5427 * Try find a completely unused register, preferably a call-volatile one.
5428 */
5429 uint8_t idxSimdReg;
5430 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5431 & ~pReNative->Core.bmHstRegsWithGstShadow
5432 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5433 if (fRegs)
5434 {
5435 if (fPreferVolatile)
5436 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5437 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5438 else
5439 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5440 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5441 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5442 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5443
5444 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5445 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5446 }
5447 else
5448 {
5449 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5450 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5451 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5452 }
5453
5454 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5455 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5456}
5457
5458
5459/**
5460 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5461 * registers.
5462 *
5463 * @returns The host register number; throws VBox status code on failure,
5464 * so no need to check the return value.
5465 * @param pReNative The native recompile state.
5466 * @param poff Pointer to the variable with the code buffer position.
5467 * This will be update if we need to move a variable from
5468 * register to stack in order to satisfy the request.
5469 * @param fRegMask Mask of acceptable registers.
5470 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5471 * registers (@c true, default) or the other way around
5472 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5473 */
5474DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5475 bool fPreferVolatile /*= true*/)
5476{
5477 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5478 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5479
5480 /*
5481 * Try find a completely unused register, preferably a call-volatile one.
5482 */
5483 uint8_t idxSimdReg;
5484 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5485 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5486 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5487 & fRegMask;
5488 if (fRegs)
5489 {
5490 if (fPreferVolatile)
5491 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5492 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5493 else
5494 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5495 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5496 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5497 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5498
5499 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5500 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5501 }
5502 else
5503 {
5504 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5505 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5506 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5507 }
5508
5509 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5510 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5511}
5512
5513
5514/**
5515 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5516 *
5517 * @param pReNative The native recompile state.
5518 * @param idxHstSimdReg The host SIMD register to update the state for.
5519 * @param enmLoadSz The load size to set.
5520 */
5521DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5522 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5523{
5524 /* Everything valid already? -> nothing to do. */
5525 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5526 return;
5527
5528 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5529 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5530 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5531 {
5532 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5533 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5534 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5535 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5536 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5537 }
5538}
5539
5540
5541static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5542 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5543{
5544 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5545 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5546 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5547 {
5548# ifdef RT_ARCH_ARM64
5549 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5550 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5551# endif
5552
5553 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5554 {
5555 switch (enmLoadSzDst)
5556 {
5557 case kIemNativeGstSimdRegLdStSz_256:
5558 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5559 break;
5560 case kIemNativeGstSimdRegLdStSz_Low128:
5561 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5562 break;
5563 case kIemNativeGstSimdRegLdStSz_High128:
5564 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5565 break;
5566 default:
5567 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5568 }
5569
5570 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5571 }
5572 }
5573 else
5574 {
5575 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5576 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5577 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5578 }
5579
5580 return off;
5581}
5582
5583
5584/**
5585 * Allocates a temporary host SIMD register for keeping a guest
5586 * SIMD register value.
5587 *
5588 * Since we may already have a register holding the guest register value,
5589 * code will be emitted to do the loading if that's not the case. Code may also
5590 * be emitted if we have to free up a register to satify the request.
5591 *
5592 * @returns The host register number; throws VBox status code on failure, so no
5593 * need to check the return value.
5594 * @param pReNative The native recompile state.
5595 * @param poff Pointer to the variable with the code buffer
5596 * position. This will be update if we need to move a
5597 * variable from register to stack in order to satisfy
5598 * the request.
5599 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5600 * @param enmIntendedUse How the caller will be using the host register.
5601 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5602 * register is okay (default). The ASSUMPTION here is
5603 * that the caller has already flushed all volatile
5604 * registers, so this is only applied if we allocate a
5605 * new register.
5606 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5607 */
5608DECL_HIDDEN_THROW(uint8_t)
5609iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5610 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5611 bool fNoVolatileRegs /*= false*/)
5612{
5613 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5614#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5615 AssertMsg( pReNative->idxCurCall == 0
5616 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5617 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5618 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5619 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5620 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5621 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5622#endif
5623#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5624 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5625#endif
5626 uint32_t const fRegMask = !fNoVolatileRegs
5627 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5628 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5629
5630 /*
5631 * First check if the guest register value is already in a host register.
5632 */
5633 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5634 {
5635 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5636 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5637 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5638 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5639
5640 /* It's not supposed to be allocated... */
5641 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5642 {
5643 /*
5644 * If the register will trash the guest shadow copy, try find a
5645 * completely unused register we can use instead. If that fails,
5646 * we need to disassociate the host reg from the guest reg.
5647 */
5648 /** @todo would be nice to know if preserving the register is in any way helpful. */
5649 /* If the purpose is calculations, try duplicate the register value as
5650 we'll be clobbering the shadow. */
5651 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5652 && ( ~pReNative->Core.bmHstSimdRegs
5653 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5654 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5655 {
5656 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5657
5658 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5659
5660 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5661 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5662 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5663 idxSimdReg = idxRegNew;
5664 }
5665 /* If the current register matches the restrictions, go ahead and allocate
5666 it for the caller. */
5667 else if (fRegMask & RT_BIT_32(idxSimdReg))
5668 {
5669 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5670 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5671 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5672 {
5673 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5674 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5675 else
5676 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5677 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5678 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5679 }
5680 else
5681 {
5682 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5683 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5684 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5685 }
5686 }
5687 /* Otherwise, allocate a register that satisfies the caller and transfer
5688 the shadowing if compatible with the intended use. (This basically
5689 means the call wants a non-volatile register (RSP push/pop scenario).) */
5690 else
5691 {
5692 Assert(fNoVolatileRegs);
5693 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5694 !fNoVolatileRegs
5695 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5696 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5697 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5698 {
5699 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5700 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5701 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5702 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5703 }
5704 else
5705 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5706 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5707 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5708 idxSimdReg = idxRegNew;
5709 }
5710 }
5711 else
5712 {
5713 /*
5714 * Oops. Shadowed guest register already allocated!
5715 *
5716 * Allocate a new register, copy the value and, if updating, the
5717 * guest shadow copy assignment to the new register.
5718 */
5719 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5720 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5721 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5722 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5723
5724 /** @todo share register for readonly access. */
5725 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5726 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5727
5728 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5729 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5730 else
5731 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5732
5733 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5734 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5735 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5736 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5737 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5738 else
5739 {
5740 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5741 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5742 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5743 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5744 }
5745 idxSimdReg = idxRegNew;
5746 }
5747 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5748
5749#ifdef VBOX_STRICT
5750 /* Strict builds: Check that the value is correct. */
5751 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5752 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5753#endif
5754
5755 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5756 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5757 {
5758# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5759 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5760 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5761# endif
5762
5763 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5764 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5765 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5766 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5767 else
5768 {
5769 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5770 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5771 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5772 }
5773 }
5774
5775 return idxSimdReg;
5776 }
5777
5778 /*
5779 * Allocate a new register, load it with the guest value and designate it as a copy of the
5780 */
5781 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5782
5783 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5784 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5785 else
5786 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5787
5788 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5789 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5790
5791 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5792 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5793 {
5794# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5795 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5796 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5797# endif
5798
5799 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5800 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5801 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5802 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5803 else
5804 {
5805 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5806 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5807 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5808 }
5809 }
5810
5811 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5812 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5813
5814 return idxRegNew;
5815}
5816
5817
5818/**
5819 * Flushes guest SIMD register shadow copies held by a set of host registers.
5820 *
5821 * This is used whenever calling an external helper for ensuring that we don't carry on
5822 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5823 *
5824 * @param pReNative The native recompile state.
5825 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5826 */
5827DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5828{
5829 /*
5830 * Reduce the mask by what's currently shadowed.
5831 */
5832 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5833 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5834 if (fHstSimdRegs)
5835 {
5836 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5837 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5838 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5839 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5840 if (bmHstSimdRegsWithGstShadowNew)
5841 {
5842 /*
5843 * Partial (likely).
5844 */
5845 uint64_t fGstShadows = 0;
5846 do
5847 {
5848 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5849 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5850 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5851 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5852 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5853 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5854
5855 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5856 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5857 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5858 } while (fHstSimdRegs != 0);
5859 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5860 }
5861 else
5862 {
5863 /*
5864 * Clear all.
5865 */
5866 do
5867 {
5868 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5869 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5870 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5871 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5872 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5873 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5874
5875 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5876 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5877 } while (fHstSimdRegs != 0);
5878 pReNative->Core.bmGstSimdRegShadows = 0;
5879 }
5880 }
5881}
5882#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5883
5884
5885
5886/*********************************************************************************************************************************
5887* Code emitters for flushing pending guest register writes and sanity checks *
5888*********************************************************************************************************************************/
5889
5890#ifdef VBOX_STRICT
5891/**
5892 * Does internal register allocator sanity checks.
5893 */
5894DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5895{
5896 /*
5897 * Iterate host registers building a guest shadowing set.
5898 */
5899 uint64_t bmGstRegShadows = 0;
5900 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5901 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5902 while (bmHstRegsWithGstShadow)
5903 {
5904 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5905 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5906 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5907
5908 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5909 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5910 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5911 bmGstRegShadows |= fThisGstRegShadows;
5912 while (fThisGstRegShadows)
5913 {
5914 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5915 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5916 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5917 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5918 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5919 }
5920 }
5921 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5922 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5923 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5924
5925 /*
5926 * Now the other way around, checking the guest to host index array.
5927 */
5928 bmHstRegsWithGstShadow = 0;
5929 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5930 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5931 while (bmGstRegShadows)
5932 {
5933 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5934 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5935 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5936
5937 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5938 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5939 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5940 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5941 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5942 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5943 }
5944 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5945 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5946 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5947}
5948#endif /* VBOX_STRICT */
5949
5950
5951/**
5952 * Flushes any delayed guest register writes.
5953 *
5954 * This must be called prior to calling CImpl functions and any helpers that use
5955 * the guest state (like raising exceptions) and such.
5956 *
5957 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5958 * the caller if it wishes to do so.
5959 */
5960DECL_HIDDEN_THROW(uint32_t)
5961iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5962{
5963#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5964 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5965 off = iemNativeEmitPcWriteback(pReNative, off);
5966#else
5967 RT_NOREF(pReNative, fGstShwExcept);
5968#endif
5969
5970#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5971 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5972#endif
5973
5974#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5975 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5976#endif
5977
5978 return off;
5979}
5980
5981#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5982
5983# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5984
5985/**
5986 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5987 */
5988DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5989{
5990 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5991 Assert(pReNative->Core.fDebugPcInitialized);
5992
5993 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5994# ifdef RT_ARCH_AMD64
5995 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5996 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5997 pCodeBuf[off++] = 0x3b;
5998 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5999# else
6000 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6001 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
6002 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
6003# endif
6004
6005 uint32_t offFixup = off;
6006 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
6007 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
6008 iemNativeFixupFixedJump(pReNative, offFixup, off);
6009
6010 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6011 return off;
6012}
6013
6014
6015/**
6016 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
6017 */
6018DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6019{
6020 if (pReNative->Core.fDebugPcInitialized)
6021 {
6022 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
6023 if (pReNative->Core.offPc)
6024 {
6025 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6026 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
6027 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
6028 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6029 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
6030 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6031 }
6032 else
6033 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6034 iemNativeRegFreeTmp(pReNative, idxPcReg);
6035 }
6036 return off;
6037}
6038
6039# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
6040
6041/**
6042 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6043 */
6044DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6045{
6046 Assert(pReNative->Core.offPc);
6047# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
6048 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
6049# else
6050 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
6051 uint8_t idxCurCall = pReNative->idxCurCall;
6052 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
6053 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
6054 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
6055 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
6056 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
6057 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
6058 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
6059
6060 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
6061
6062# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6063 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6064 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
6065# endif
6066# endif
6067
6068# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6069 /* Allocate a temporary PC register. */
6070 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6071
6072 /* Perform the addition and store the result. */
6073 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6074 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6075# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6076 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6077# endif
6078
6079 /* Free but don't flush the PC register. */
6080 iemNativeRegFreeTmp(pReNative, idxPcReg);
6081# else
6082 /* Compare the shadow with the context value, they should match. */
6083 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6084 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6085# endif
6086
6087 pReNative->Core.offPc = 0;
6088
6089 return off;
6090}
6091
6092#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6093
6094
6095/*********************************************************************************************************************************
6096* Code Emitters (larger snippets) *
6097*********************************************************************************************************************************/
6098
6099/**
6100 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6101 * extending to 64-bit width.
6102 *
6103 * @returns New code buffer offset on success, UINT32_MAX on failure.
6104 * @param pReNative .
6105 * @param off The current code buffer position.
6106 * @param idxHstReg The host register to load the guest register value into.
6107 * @param enmGstReg The guest register to load.
6108 *
6109 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6110 * that is something the caller needs to do if applicable.
6111 */
6112DECL_HIDDEN_THROW(uint32_t)
6113iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6114{
6115 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6116 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6117
6118 switch (g_aGstShadowInfo[enmGstReg].cb)
6119 {
6120 case sizeof(uint64_t):
6121 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6122 case sizeof(uint32_t):
6123 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6124 case sizeof(uint16_t):
6125 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6126#if 0 /* not present in the table. */
6127 case sizeof(uint8_t):
6128 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6129#endif
6130 default:
6131 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6132 }
6133}
6134
6135
6136/**
6137 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6138 * extending to 64-bit width, extended version.
6139 *
6140 * @returns New code buffer offset on success, UINT32_MAX on failure.
6141 * @param pCodeBuf The code buffer.
6142 * @param off The current code buffer position.
6143 * @param idxHstReg The host register to load the guest register value into.
6144 * @param enmGstReg The guest register to load.
6145 *
6146 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6147 * that is something the caller needs to do if applicable.
6148 */
6149DECL_HIDDEN_THROW(uint32_t)
6150iemNativeEmitLoadGprWithGstShadowRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6151{
6152 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6153 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6154
6155 switch (g_aGstShadowInfo[enmGstReg].cb)
6156 {
6157 case sizeof(uint64_t):
6158 return iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6159 case sizeof(uint32_t):
6160 return iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6161 case sizeof(uint16_t):
6162 return iemNativeEmitLoadGprFromVCpuU16Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6163#if 0 /* not present in the table. */
6164 case sizeof(uint8_t):
6165 return iemNativeEmitLoadGprFromVCpuU8Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6166#endif
6167 default:
6168#ifdef IEM_WITH_THROW_CATCH
6169 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6170#else
6171 AssertReleaseFailedReturn(off);
6172#endif
6173 }
6174}
6175
6176
6177#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6178/**
6179 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6180 *
6181 * @returns New code buffer offset on success, UINT32_MAX on failure.
6182 * @param pReNative The recompiler state.
6183 * @param off The current code buffer position.
6184 * @param idxHstSimdReg The host register to load the guest register value into.
6185 * @param enmGstSimdReg The guest register to load.
6186 * @param enmLoadSz The load size of the register.
6187 *
6188 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6189 * that is something the caller needs to do if applicable.
6190 */
6191DECL_HIDDEN_THROW(uint32_t)
6192iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6193 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6194{
6195 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6196
6197 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6198 switch (enmLoadSz)
6199 {
6200 case kIemNativeGstSimdRegLdStSz_256:
6201 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6202 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6203 case kIemNativeGstSimdRegLdStSz_Low128:
6204 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6205 case kIemNativeGstSimdRegLdStSz_High128:
6206 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6207 default:
6208 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6209 }
6210}
6211#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6212
6213#ifdef VBOX_STRICT
6214
6215/**
6216 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6217 *
6218 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6219 * Trashes EFLAGS on AMD64.
6220 */
6221DECL_FORCE_INLINE(uint32_t)
6222iemNativeEmitTop32BitsClearCheckEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxReg)
6223{
6224# ifdef RT_ARCH_AMD64
6225 /* rol reg64, 32 */
6226 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6227 pCodeBuf[off++] = 0xc1;
6228 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6229 pCodeBuf[off++] = 32;
6230
6231 /* test reg32, ffffffffh */
6232 if (idxReg >= 8)
6233 pCodeBuf[off++] = X86_OP_REX_B;
6234 pCodeBuf[off++] = 0xf7;
6235 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6236 pCodeBuf[off++] = 0xff;
6237 pCodeBuf[off++] = 0xff;
6238 pCodeBuf[off++] = 0xff;
6239 pCodeBuf[off++] = 0xff;
6240
6241 /* je/jz +1 */
6242 pCodeBuf[off++] = 0x74;
6243 pCodeBuf[off++] = 0x01;
6244
6245 /* int3 */
6246 pCodeBuf[off++] = 0xcc;
6247
6248 /* rol reg64, 32 */
6249 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6250 pCodeBuf[off++] = 0xc1;
6251 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6252 pCodeBuf[off++] = 32;
6253
6254# elif defined(RT_ARCH_ARM64)
6255 /* lsr tmp0, reg64, #32 */
6256 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6257 /* cbz tmp0, +1 */
6258 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6259 /* brk #0x1100 */
6260 pCodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6261
6262# else
6263# error "Port me!"
6264# endif
6265 return off;
6266}
6267
6268
6269/**
6270 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6271 *
6272 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6273 * Trashes EFLAGS on AMD64.
6274 */
6275DECL_HIDDEN_THROW(uint32_t)
6276iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6277{
6278# ifdef RT_ARCH_AMD64
6279 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6280# elif defined(RT_ARCH_ARM64)
6281 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6282# else
6283# error "Port me!"
6284# endif
6285 off = iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6286 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6287 return off;
6288}
6289
6290
6291/**
6292 * Emitting code that checks that the content of register @a idxReg is the same
6293 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6294 * instruction if that's not the case.
6295 *
6296 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6297 * Trashes EFLAGS on AMD64.
6298 */
6299DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheckEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf,
6300 uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6301{
6302#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6303 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6304 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6305 return off;
6306#endif
6307
6308# ifdef RT_ARCH_AMD64
6309 /* cmp reg, [mem] */
6310 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6311 {
6312 if (idxReg >= 8)
6313 pCodeBuf[off++] = X86_OP_REX_R;
6314 pCodeBuf[off++] = 0x38;
6315 }
6316 else
6317 {
6318 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6319 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6320 else
6321 {
6322 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6323 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6324 else
6325 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6326 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6327 if (idxReg >= 8)
6328 pCodeBuf[off++] = X86_OP_REX_R;
6329 }
6330 pCodeBuf[off++] = 0x39;
6331 }
6332 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6333
6334 /* je/jz +1 */
6335 pCodeBuf[off++] = 0x74;
6336 pCodeBuf[off++] = 0x01;
6337
6338 /* int3 */
6339 pCodeBuf[off++] = 0xcc;
6340
6341 /* For values smaller than the register size, we must check that the rest
6342 of the register is all zeros. */
6343 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6344 {
6345 /* test reg64, imm32 */
6346 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6347 pCodeBuf[off++] = 0xf7;
6348 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6349 pCodeBuf[off++] = 0;
6350 pCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6351 pCodeBuf[off++] = 0xff;
6352 pCodeBuf[off++] = 0xff;
6353
6354 /* je/jz +1 */
6355 pCodeBuf[off++] = 0x74;
6356 pCodeBuf[off++] = 0x01;
6357
6358 /* int3 */
6359 pCodeBuf[off++] = 0xcc;
6360 }
6361 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6362 iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6363 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6364
6365# elif defined(RT_ARCH_ARM64)
6366 /* mov TMP0, [gstreg] */
6367 off = iemNativeEmitLoadGprWithGstShadowRegEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6368
6369 /* sub tmp0, tmp0, idxReg */
6370 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6371 /* cbz tmp0, +2 */
6372 pCodeBuf[off++] = Armv8A64MkInstrCbz(2, IEMNATIVE_REG_FIXED_TMP0);
6373 /* brk #0x1000+enmGstReg */
6374 pCodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6375 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6376
6377# else
6378# error "Port me!"
6379# endif
6380 return off;
6381}
6382
6383
6384/**
6385 * Emitting code that checks that the content of register @a idxReg is the same
6386 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6387 * instruction if that's not the case.
6388 *
6389 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6390 * Trashes EFLAGS on AMD64.
6391 */
6392DECL_HIDDEN_THROW(uint32_t)
6393iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6394{
6395#ifdef RT_ARCH_AMD64
6396 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6397#elif defined(RT_ARCH_ARM64)
6398 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6399# else
6400# error "Port me!"
6401# endif
6402 return iemNativeEmitGuestRegValueCheckEx(pReNative, pCodeBuf, off, idxReg, enmGstReg);
6403}
6404
6405# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6406# ifdef RT_ARCH_AMD64
6407/**
6408 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6409 */
6410DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6411{
6412 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6413 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6414 if (idxSimdReg >= 8)
6415 pbCodeBuf[off++] = X86_OP_REX_R;
6416 pbCodeBuf[off++] = 0x0f;
6417 pbCodeBuf[off++] = 0x38;
6418 pbCodeBuf[off++] = 0x29;
6419 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6420
6421 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6422 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6423 pbCodeBuf[off++] = X86_OP_REX_W
6424 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6425 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6426 pbCodeBuf[off++] = 0x0f;
6427 pbCodeBuf[off++] = 0x3a;
6428 pbCodeBuf[off++] = 0x16;
6429 pbCodeBuf[off++] = 0xeb;
6430 pbCodeBuf[off++] = 0x00;
6431
6432 /* cmp tmp0, 0xffffffffffffffff. */
6433 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6434 pbCodeBuf[off++] = 0x83;
6435 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6436 pbCodeBuf[off++] = 0xff;
6437
6438 /* je/jz +1 */
6439 pbCodeBuf[off++] = 0x74;
6440 pbCodeBuf[off++] = 0x01;
6441
6442 /* int3 */
6443 pbCodeBuf[off++] = 0xcc;
6444
6445 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6446 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6447 pbCodeBuf[off++] = X86_OP_REX_W
6448 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6449 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6450 pbCodeBuf[off++] = 0x0f;
6451 pbCodeBuf[off++] = 0x3a;
6452 pbCodeBuf[off++] = 0x16;
6453 pbCodeBuf[off++] = 0xeb;
6454 pbCodeBuf[off++] = 0x01;
6455
6456 /* cmp tmp0, 0xffffffffffffffff. */
6457 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6458 pbCodeBuf[off++] = 0x83;
6459 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6460 pbCodeBuf[off++] = 0xff;
6461
6462 /* je/jz +1 */
6463 pbCodeBuf[off++] = 0x74;
6464 pbCodeBuf[off++] = 0x01;
6465
6466 /* int3 */
6467 pbCodeBuf[off++] = 0xcc;
6468
6469 return off;
6470}
6471# endif
6472
6473
6474/**
6475 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6476 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6477 * instruction if that's not the case.
6478 *
6479 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6480 * Trashes EFLAGS on AMD64.
6481 */
6482DECL_HIDDEN_THROW(uint32_t)
6483iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6484 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6485{
6486 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6487 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6488 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6489 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6490 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6491 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6492 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6493 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6494 return off;
6495
6496# ifdef RT_ARCH_AMD64
6497 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6498 {
6499 /* movdqa vectmp0, idxSimdReg */
6500 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6501
6502 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6503
6504 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6505 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6506 }
6507
6508 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6509 {
6510 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6511 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6512
6513 /* vextracti128 vectmp0, idxSimdReg, 1 */
6514 pbCodeBuf[off++] = X86_OP_VEX3;
6515 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6516 | X86_OP_VEX3_BYTE1_X
6517 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6518 | 0x03; /* Opcode map */
6519 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6520 pbCodeBuf[off++] = 0x39;
6521 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6522 pbCodeBuf[off++] = 0x01;
6523
6524 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6525 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6526 }
6527# elif defined(RT_ARCH_ARM64)
6528 /* mov vectmp0, [gstreg] */
6529 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6530
6531 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6532 {
6533 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6534 /* eor vectmp0, vectmp0, idxSimdReg */
6535 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6536 /* uaddlv vectmp0, vectmp0.16B */
6537 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6538 /* umov tmp0, vectmp0.H[0] */
6539 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6540 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6541 /* cbz tmp0, +1 */
6542 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6543 /* brk #0x1000+enmGstReg */
6544 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6545 }
6546
6547 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6548 {
6549 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6550 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6551 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6552 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6553 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6554 /* umov tmp0, (vectmp0 + 1).H[0] */
6555 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6556 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6557 /* cbz tmp0, +1 */
6558 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6559 /* brk #0x1000+enmGstReg */
6560 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6561 }
6562
6563# else
6564# error "Port me!"
6565# endif
6566
6567 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6568 return off;
6569}
6570# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6571
6572
6573/**
6574 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6575 * important bits.
6576 *
6577 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6578 * Trashes EFLAGS on AMD64.
6579 */
6580DECL_HIDDEN_THROW(uint32_t)
6581iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6582{
6583 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6584 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6585 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6586 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6587
6588#ifdef RT_ARCH_AMD64
6589 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6590
6591 /* je/jz +1 */
6592 pbCodeBuf[off++] = 0x74;
6593 pbCodeBuf[off++] = 0x01;
6594
6595 /* int3 */
6596 pbCodeBuf[off++] = 0xcc;
6597
6598# elif defined(RT_ARCH_ARM64)
6599 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6600
6601 /* b.eq +1 */
6602 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6603 /* brk #0x2000 */
6604 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6605
6606# else
6607# error "Port me!"
6608# endif
6609 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6610
6611 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6612 return off;
6613}
6614
6615#endif /* VBOX_STRICT */
6616
6617
6618#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6619/**
6620 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6621 */
6622DECL_HIDDEN_THROW(uint32_t)
6623iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6624{
6625 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6626
6627 fEflNeeded &= X86_EFL_STATUS_BITS;
6628 if (fEflNeeded)
6629 {
6630# ifdef RT_ARCH_AMD64
6631 /* test dword [pVCpu + offVCpu], imm32 */
6632 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6633 if (fEflNeeded <= 0xff)
6634 {
6635 pCodeBuf[off++] = 0xf6;
6636 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6637 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6638 }
6639 else
6640 {
6641 pCodeBuf[off++] = 0xf7;
6642 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6643 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6644 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6645 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6646 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6647 }
6648
6649 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6650 pCodeBuf[off++] = 0xcc;
6651
6652 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6653
6654# else
6655 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6656 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6657 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6658# ifdef RT_ARCH_ARM64
6659 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6660 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6661# else
6662# error "Port me!"
6663# endif
6664 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6665# endif
6666 }
6667 return off;
6668}
6669#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6670
6671
6672/**
6673 * Emits a code for checking the return code of a call and rcPassUp, returning
6674 * from the code if either are non-zero.
6675 */
6676DECL_HIDDEN_THROW(uint32_t)
6677iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6678{
6679#ifdef RT_ARCH_AMD64
6680 /*
6681 * AMD64: eax = call status code.
6682 */
6683
6684 /* edx = rcPassUp */
6685 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6686# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6687 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6688# endif
6689
6690 /* edx = eax | rcPassUp */
6691 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6692 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6693 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6694 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6695
6696 /* Jump to non-zero status return path. */
6697 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, off);
6698
6699 /* done. */
6700
6701#elif RT_ARCH_ARM64
6702 /*
6703 * ARM64: w0 = call status code.
6704 */
6705 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+3+3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
6706
6707# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6708 AssertCompile(ARMV8_A64_REG_X2 == IEMNATIVE_CALL_ARG2_GREG);
6709 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, ARMV8_A64_REG_X2, idxInstr);
6710# endif
6711 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6712
6713 pCodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6714
6715 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, pCodeBuf, off,
6716 ARMV8_A64_REG_X4, true /*f64Bit*/);
6717
6718#else
6719# error "port me"
6720#endif
6721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6722 RT_NOREF_PV(idxInstr);
6723 return off;
6724}
6725
6726
6727/**
6728 * Emits a call to a CImpl function or something similar.
6729 */
6730DECL_HIDDEN_THROW(uint32_t)
6731iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6732 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6733{
6734 /* Writeback everything. */
6735 off = iemNativeRegFlushPendingWrites(pReNative, off);
6736
6737 /*
6738 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6739 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6740 */
6741 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6742 fGstShwFlush
6743 | RT_BIT_64(kIemNativeGstReg_Pc)
6744 | RT_BIT_64(kIemNativeGstReg_EFlags));
6745 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6746
6747 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6748
6749 /*
6750 * Load the parameters.
6751 */
6752#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6753 /* Special code the hidden VBOXSTRICTRC pointer. */
6754 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6755 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6756 if (cAddParams > 0)
6757 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6758 if (cAddParams > 1)
6759 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6760 if (cAddParams > 2)
6761 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6762 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6763
6764#else
6765 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6766 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6767 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6768 if (cAddParams > 0)
6769 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6770 if (cAddParams > 1)
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6772 if (cAddParams > 2)
6773# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6774 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6775# else
6776 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6777# endif
6778#endif
6779
6780 /*
6781 * Make the call.
6782 */
6783 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6784
6785#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6786 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6787#endif
6788
6789#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6790 pReNative->Core.fDebugPcInitialized = false;
6791 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6792#endif
6793
6794 /*
6795 * Check the status code.
6796 */
6797 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6798}
6799
6800
6801/**
6802 * Emits a call to a threaded worker function.
6803 */
6804DECL_HIDDEN_THROW(uint32_t)
6805iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6806{
6807 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6808 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6809
6810 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6811 off = iemNativeRegFlushPendingWrites(pReNative, off);
6812
6813 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6814 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6815
6816#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6817 /* The threaded function may throw / long jmp, so set current instruction
6818 number if we're counting. */
6819 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6820#endif
6821
6822 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6823
6824#ifdef RT_ARCH_AMD64
6825 /* Load the parameters and emit the call. */
6826# ifdef RT_OS_WINDOWS
6827# ifndef VBOXSTRICTRC_STRICT_ENABLED
6828 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6829 if (cParams > 0)
6830 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6831 if (cParams > 1)
6832 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6833 if (cParams > 2)
6834 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6835# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6836 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6837 if (cParams > 0)
6838 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6839 if (cParams > 1)
6840 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6841 if (cParams > 2)
6842 {
6843 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6844 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6845 }
6846 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6847# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6848# else
6849 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6850 if (cParams > 0)
6851 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6852 if (cParams > 1)
6853 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6854 if (cParams > 2)
6855 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6856# endif
6857
6858 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6859
6860# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6861 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6862# endif
6863
6864#elif RT_ARCH_ARM64
6865 /*
6866 * ARM64:
6867 */
6868 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6869 if (cParams > 0)
6870 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6871 if (cParams > 1)
6872 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6873 if (cParams > 2)
6874 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6875
6876 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6877
6878#else
6879# error "port me"
6880#endif
6881
6882#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6883 pReNative->Core.fDebugPcInitialized = false;
6884 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6885#endif
6886
6887 /*
6888 * Check the status code.
6889 */
6890 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6891
6892 return off;
6893}
6894
6895
6896/**
6897 * The default liveness function, matching iemNativeEmitThreadedCall.
6898 */
6899IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6900{
6901 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6902 RT_NOREF(pCallEntry);
6903}
6904
6905#ifdef VBOX_WITH_STATISTICS
6906
6907/**
6908 * Emits code to update the thread call statistics.
6909 */
6910DECL_INLINE_THROW(uint32_t)
6911iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6912{
6913 /*
6914 * Update threaded function stats.
6915 */
6916 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6917 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6918# if defined(RT_ARCH_ARM64)
6919 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6920 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6921 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6922 iemNativeRegFreeTmp(pReNative, idxTmp1);
6923 iemNativeRegFreeTmp(pReNative, idxTmp2);
6924# else
6925 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6926# endif
6927 return off;
6928}
6929
6930
6931/**
6932 * Emits code to update the TB exit reason statistics.
6933 */
6934DECL_INLINE_THROW(uint32_t)
6935iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6936{
6937 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6938 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6939 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6940 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6941 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6942
6943 return off;
6944}
6945
6946#endif /* VBOX_WITH_STATISTICS */
6947
6948/**
6949 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6950 */
6951static uint32_t
6952iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6953{
6954 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6955 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6956
6957 /* Jump to ReturnBreak if the return register is NULL. */
6958 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6959 true /*f64Bit*/, offReturnBreak);
6960
6961 /* Okay, continue executing the next TB. */
6962 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6963 return off;
6964}
6965
6966
6967/**
6968 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6969 */
6970static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6971{
6972 /* set the return status */
6973 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6974}
6975
6976
6977/**
6978 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6979 */
6980static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6981{
6982 /* set the return status */
6983 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6984}
6985
6986
6987/**
6988 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6989 */
6990static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6991{
6992 /* set the return status */
6993 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6994}
6995
6996
6997/**
6998 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6999 */
7000static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7001{
7002 /*
7003 * Generate the rc + rcPassUp fiddling code.
7004 */
7005 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7006#ifdef RT_ARCH_AMD64
7007# ifdef RT_OS_WINDOWS
7008# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7009 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7010# endif
7011 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7013# else
7014 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7015 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7016# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7017 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7018# endif
7019# endif
7020# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7021 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7022# endif
7023
7024#else
7025 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7026 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7027 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7028#endif
7029
7030 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7031 return off;
7032}
7033
7034
7035/**
7036 * Emits a standard epilog.
7037 */
7038static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7039{
7040 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7041
7042 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
7043 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7044
7045 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7046 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7047
7048 /*
7049 * Restore registers and return.
7050 */
7051#ifdef RT_ARCH_AMD64
7052 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7053
7054 /* Reposition esp at the r15 restore point. */
7055 pbCodeBuf[off++] = X86_OP_REX_W;
7056 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7057 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7058 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7059
7060 /* Pop non-volatile registers and return */
7061 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7062 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7063 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7064 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7065 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7066 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7067 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7068 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7069# ifdef RT_OS_WINDOWS
7070 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7071 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7072# endif
7073 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7074 pbCodeBuf[off++] = 0xc9; /* leave */
7075 pbCodeBuf[off++] = 0xc3; /* ret */
7076 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7077
7078#elif RT_ARCH_ARM64
7079 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7080
7081 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7082 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7083 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7084 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7085 IEMNATIVE_FRAME_VAR_SIZE / 8);
7086 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7087 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7088 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7089 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7090 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7091 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7092 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7093 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7094 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7095 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7096 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7097 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7098
7099 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7100 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7101 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7102 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7103
7104 /* retab / ret */
7105# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7106 if (1)
7107 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7108 else
7109# endif
7110 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7111
7112#else
7113# error "port me"
7114#endif
7115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7116
7117 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
7118 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7119
7120 return off;
7121}
7122
7123
7124
7125/*********************************************************************************************************************************
7126* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7127*********************************************************************************************************************************/
7128
7129/**
7130 * Internal work that allocates a variable with kind set to
7131 * kIemNativeVarKind_Invalid and no current stack allocation.
7132 *
7133 * The kind will either be set by the caller or later when the variable is first
7134 * assigned a value.
7135 *
7136 * @returns Unpacked index.
7137 * @internal
7138 */
7139static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7140{
7141 Assert(cbType > 0 && cbType <= 64);
7142 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7143 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7144 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7145 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7146 pReNative->Core.aVars[idxVar].cbVar = cbType;
7147 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7148 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7149 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7150 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7151 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7152 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7153 pReNative->Core.aVars[idxVar].u.uValue = 0;
7154#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7155 pReNative->Core.aVars[idxVar].fSimdReg = false;
7156#endif
7157 return idxVar;
7158}
7159
7160
7161/**
7162 * Internal work that allocates an argument variable w/o setting enmKind.
7163 *
7164 * @returns Unpacked index.
7165 * @internal
7166 */
7167static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7168{
7169 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7170 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7171 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7172
7173 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7174 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7175 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7176 return idxVar;
7177}
7178
7179
7180/**
7181 * Gets the stack slot for a stack variable, allocating one if necessary.
7182 *
7183 * Calling this function implies that the stack slot will contain a valid
7184 * variable value. The caller deals with any register currently assigned to the
7185 * variable, typically by spilling it into the stack slot.
7186 *
7187 * @returns The stack slot number.
7188 * @param pReNative The recompiler state.
7189 * @param idxVar The variable.
7190 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7191 */
7192DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7193{
7194 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7195 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7196 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7197
7198 /* Already got a slot? */
7199 uint8_t const idxStackSlot = pVar->idxStackSlot;
7200 if (idxStackSlot != UINT8_MAX)
7201 {
7202 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7203 return idxStackSlot;
7204 }
7205
7206 /*
7207 * A single slot is easy to allocate.
7208 * Allocate them from the top end, closest to BP, to reduce the displacement.
7209 */
7210 if (pVar->cbVar <= sizeof(uint64_t))
7211 {
7212 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7213 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7214 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7215 pVar->idxStackSlot = (uint8_t)iSlot;
7216 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7217 return (uint8_t)iSlot;
7218 }
7219
7220 /*
7221 * We need more than one stack slot.
7222 *
7223 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7224 */
7225 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7226 Assert(pVar->cbVar <= 64);
7227 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7228 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7229 uint32_t bmStack = pReNative->Core.bmStack;
7230 while (bmStack != UINT32_MAX)
7231 {
7232 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7233 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7234 iSlot = (iSlot - 1) & ~fBitAlignMask;
7235 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7236 {
7237 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7238 pVar->idxStackSlot = (uint8_t)iSlot;
7239 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7240 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7241 return (uint8_t)iSlot;
7242 }
7243
7244 bmStack |= (fBitAllocMask << iSlot);
7245 }
7246 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7247}
7248
7249
7250/**
7251 * Changes the variable to a stack variable.
7252 *
7253 * Currently this is s only possible to do the first time the variable is used,
7254 * switching later is can be implemented but not done.
7255 *
7256 * @param pReNative The recompiler state.
7257 * @param idxVar The variable.
7258 * @throws VERR_IEM_VAR_IPE_2
7259 */
7260DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7261{
7262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7263 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7264 if (pVar->enmKind != kIemNativeVarKind_Stack)
7265 {
7266 /* We could in theory transition from immediate to stack as well, but it
7267 would involve the caller doing work storing the value on the stack. So,
7268 till that's required we only allow transition from invalid. */
7269 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7270 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7271 pVar->enmKind = kIemNativeVarKind_Stack;
7272
7273 /* Note! We don't allocate a stack slot here, that's only done when a
7274 slot is actually needed to hold a variable value. */
7275 }
7276}
7277
7278
7279/**
7280 * Sets it to a variable with a constant value.
7281 *
7282 * This does not require stack storage as we know the value and can always
7283 * reload it, unless of course it's referenced.
7284 *
7285 * @param pReNative The recompiler state.
7286 * @param idxVar The variable.
7287 * @param uValue The immediate value.
7288 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7289 */
7290DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7291{
7292 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7293 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7294 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7295 {
7296 /* Only simple transitions for now. */
7297 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7298 pVar->enmKind = kIemNativeVarKind_Immediate;
7299 }
7300 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7301
7302 pVar->u.uValue = uValue;
7303 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7304 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7305 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7306}
7307
7308
7309/**
7310 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7311 *
7312 * This does not require stack storage as we know the value and can always
7313 * reload it. Loading is postponed till needed.
7314 *
7315 * @param pReNative The recompiler state.
7316 * @param idxVar The variable. Unpacked.
7317 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7318 *
7319 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7320 * @internal
7321 */
7322static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7323{
7324 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7325 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7326
7327 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7328 {
7329 /* Only simple transitions for now. */
7330 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7331 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7332 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7333 }
7334 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7335
7336 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7337
7338 /* Update the other variable, ensure it's a stack variable. */
7339 /** @todo handle variables with const values... that'll go boom now. */
7340 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7341 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7342}
7343
7344
7345/**
7346 * Sets the variable to a reference (pointer) to a guest register reference.
7347 *
7348 * This does not require stack storage as we know the value and can always
7349 * reload it. Loading is postponed till needed.
7350 *
7351 * @param pReNative The recompiler state.
7352 * @param idxVar The variable.
7353 * @param enmRegClass The class guest registers to reference.
7354 * @param idxReg The register within @a enmRegClass to reference.
7355 *
7356 * @throws VERR_IEM_VAR_IPE_2
7357 */
7358DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7359 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7360{
7361 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7362 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7363
7364 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7365 {
7366 /* Only simple transitions for now. */
7367 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7368 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7369 }
7370 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7371
7372 pVar->u.GstRegRef.enmClass = enmRegClass;
7373 pVar->u.GstRegRef.idx = idxReg;
7374}
7375
7376
7377DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7378{
7379 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7380}
7381
7382
7383DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7384{
7385 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7386
7387 /* Since we're using a generic uint64_t value type, we must truncate it if
7388 the variable is smaller otherwise we may end up with too large value when
7389 scaling up a imm8 w/ sign-extension.
7390
7391 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7392 in the bios, bx=1) when running on arm, because clang expect 16-bit
7393 register parameters to have bits 16 and up set to zero. Instead of
7394 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7395 CF value in the result. */
7396 switch (cbType)
7397 {
7398 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7399 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7400 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7401 }
7402 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7403 return idxVar;
7404}
7405
7406
7407DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7408{
7409 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7410 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7411 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7412 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7413 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7414 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7415
7416 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7417 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7418 return idxArgVar;
7419}
7420
7421
7422DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7423{
7424 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7425 /* Don't set to stack now, leave that to the first use as for instance
7426 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7427 return idxVar;
7428}
7429
7430
7431DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7432{
7433 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7434
7435 /* Since we're using a generic uint64_t value type, we must truncate it if
7436 the variable is smaller otherwise we may end up with too large value when
7437 scaling up a imm8 w/ sign-extension. */
7438 switch (cbType)
7439 {
7440 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7441 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7442 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7443 }
7444 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7445 return idxVar;
7446}
7447
7448
7449DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7450 uint8_t cbType, uint8_t idxVarOther)
7451{
7452 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7453 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7454
7455 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarOther, poff);
7456 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7457
7458/** @todo combine MOV and AND using MOVZX/similar. */
7459 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7460
7461 /* Truncate the value to this variables size. */
7462 switch (cbType)
7463 {
7464 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7465 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7466 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7467 }
7468
7469 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7470 iemNativeVarRegisterRelease(pReNative, idxVar);
7471 return idxVar;
7472}
7473
7474
7475/**
7476 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7477 * fixed till we call iemNativeVarRegisterRelease.
7478 *
7479 * @returns The host register number.
7480 * @param pReNative The recompiler state.
7481 * @param idxVar The variable.
7482 * @param poff Pointer to the instruction buffer offset.
7483 * In case a register needs to be freed up or the value
7484 * loaded off the stack.
7485 * @param idxRegPref Preferred register number or UINT8_MAX.
7486 *
7487 * @tparam a_fInitialized Set if the variable must already have been
7488 * initialized. Will throw VERR_IEM_VAR_NOT_INITIALIZED
7489 * if this is not the case.
7490 * @tparam a_fWithRegPref If idxRegPref is valid.
7491 *
7492 * @note Must not modify the host status flags!
7493 */
7494template<bool const a_fInitialized, bool const a_fWithRegPref>
7495DECL_FORCE_INLINE_THROW(uint8_t)
7496iemNativeVarRegisterAcquireInt(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7497{
7498 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7499 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7500 Assert(pVar->cbVar <= 8);
7501 Assert(!pVar->fRegAcquired);
7502 Assert(!a_fWithRegPref || idxRegPref < RT_ELEMENTS(pReNative->Core.aHstRegs));
7503
7504 /** @todo inline this bit? */
7505 uint8_t idxReg = pVar->idxReg;
7506 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7507 {
7508 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7509 && pVar->enmKind < kIemNativeVarKind_End);
7510 pVar->fRegAcquired = true;
7511 return idxReg;
7512 }
7513
7514 /*
7515 * If the kind of variable has not yet been set, default to 'stack'.
7516 */
7517 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7518 && pVar->enmKind < kIemNativeVarKind_End);
7519 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7520 iemNativeVarSetKindToStack(pReNative, idxVar);
7521
7522 /*
7523 * We have to allocate a register for the variable, even if its a stack one
7524 * as we don't know if there are modification being made to it before its
7525 * finalized (todo: analyze and insert hints about that?).
7526 *
7527 * If we can, we try get the correct register for argument variables. This
7528 * is assuming that most argument variables are fetched as close as possible
7529 * to the actual call, so that there aren't any interfering hidden calls
7530 * (memory accesses, etc) inbetween.
7531 *
7532 * If we cannot or it's a variable, we make sure no argument registers
7533 * that will be used by this MC block will be allocated here, and we always
7534 * prefer non-volatile registers to avoid needing to spill stuff for internal
7535 * call.
7536 */
7537 /** @todo Detect too early argument value fetches and warn about hidden
7538 * calls causing less optimal code to be generated in the python script. */
7539
7540 uint8_t const uArgNo = pVar->uArgNo;
7541 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7542 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7543 {
7544 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7545
7546#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7547 /* Writeback any dirty shadow registers we are about to unshadow. */
7548 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7549#endif
7550
7551 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7552 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7553 }
7554 else if ( !a_fWithRegPref
7555 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7556 {
7557 /** @todo there must be a better way for this and boot cArgsX? */
7558 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7559 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7560 & ~pReNative->Core.bmHstRegsWithGstShadow
7561 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7562 & fNotArgsMask;
7563 if (fRegs)
7564 {
7565 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7566 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7567 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7568 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7569 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7570 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7571 }
7572 else
7573 {
7574 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7575 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7576 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7577 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7578 }
7579 }
7580 else
7581 {
7582 idxReg = idxRegPref;
7583 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7584 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7585 }
7586 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7587 pVar->idxReg = idxReg;
7588
7589#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7590 pVar->fSimdReg = false;
7591#endif
7592
7593 /*
7594 * Load it off the stack if we've got a stack slot.
7595 */
7596 uint8_t const idxStackSlot = pVar->idxStackSlot;
7597 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7598 {
7599 Assert(a_fInitialized);
7600 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7601 switch (pVar->cbVar)
7602 {
7603 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7604 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7605 case 3: AssertFailed(); RT_FALL_THRU();
7606 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7607 default: AssertFailed(); RT_FALL_THRU();
7608 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7609 }
7610 }
7611 else
7612 {
7613 Assert(idxStackSlot == UINT8_MAX);
7614 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7615 AssertStmt(!a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7616 else
7617 {
7618 /*
7619 * Convert from immediate to stack/register. This is currently only
7620 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7621 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7622 */
7623 AssertStmt(a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7624 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7625 idxVar, idxReg, pVar->u.uValue));
7626 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7627 pVar->enmKind = kIemNativeVarKind_Stack;
7628 }
7629 }
7630
7631 pVar->fRegAcquired = true;
7632 return idxReg;
7633}
7634
7635
7636/** See iemNativeVarRegisterAcquireInt for details. */
7637DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7638{
7639 /* very likely */
7640 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 0]);
7641 return iemNativeVarRegisterAcquireInt<false, false>(pReNative, idxVar, poff, UINT8_MAX);
7642}
7643
7644
7645/** See iemNativeVarRegisterAcquireInt for details. */
7646DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireInited(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7647{
7648 /* even more likely */
7649 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 2]);
7650 return iemNativeVarRegisterAcquireInt<true, false>(pReNative, idxVar, poff, UINT8_MAX);
7651}
7652
7653
7654/** See iemNativeVarRegisterAcquireInt for details. */
7655DECL_HIDDEN_THROW(uint8_t)
7656iemNativeVarRegisterAcquireWithPref(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7657{
7658 /* unused */
7659 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 4]);
7660 return iemNativeVarRegisterAcquireInt<false, true>(pReNative, idxVar, poff, idxRegPref);
7661}
7662
7663
7664/** See iemNativeVarRegisterAcquireInt for details. */
7665DECL_HIDDEN_THROW(uint8_t)
7666iemNativeVarRegisterAcquireInitedWithPref(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7667{
7668 /* very very likely */
7669 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 6]);
7670 return iemNativeVarRegisterAcquireInt<true, true>(pReNative, idxVar, poff, idxRegPref);
7671}
7672
7673
7674#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7675/**
7676 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7677 * fixed till we call iemNativeVarRegisterRelease.
7678 *
7679 * @returns The host register number.
7680 * @param pReNative The recompiler state.
7681 * @param idxVar The variable.
7682 * @param poff Pointer to the instruction buffer offset.
7683 * In case a register needs to be freed up or the value
7684 * loaded off the stack.
7685 * @param fInitialized Set if the variable must already have been initialized.
7686 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7687 * the case.
7688 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7689 */
7690DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7691 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7692{
7693 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7694 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7695 Assert( pVar->cbVar == sizeof(RTUINT128U)
7696 || pVar->cbVar == sizeof(RTUINT256U));
7697 Assert(!pVar->fRegAcquired);
7698
7699 uint8_t idxReg = pVar->idxReg;
7700 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7701 {
7702 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7703 && pVar->enmKind < kIemNativeVarKind_End);
7704 pVar->fRegAcquired = true;
7705 return idxReg;
7706 }
7707
7708 /*
7709 * If the kind of variable has not yet been set, default to 'stack'.
7710 */
7711 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7712 && pVar->enmKind < kIemNativeVarKind_End);
7713 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7714 iemNativeVarSetKindToStack(pReNative, idxVar);
7715
7716 /*
7717 * We have to allocate a register for the variable, even if its a stack one
7718 * as we don't know if there are modification being made to it before its
7719 * finalized (todo: analyze and insert hints about that?).
7720 *
7721 * If we can, we try get the correct register for argument variables. This
7722 * is assuming that most argument variables are fetched as close as possible
7723 * to the actual call, so that there aren't any interfering hidden calls
7724 * (memory accesses, etc) inbetween.
7725 *
7726 * If we cannot or it's a variable, we make sure no argument registers
7727 * that will be used by this MC block will be allocated here, and we always
7728 * prefer non-volatile registers to avoid needing to spill stuff for internal
7729 * call.
7730 */
7731 /** @todo Detect too early argument value fetches and warn about hidden
7732 * calls causing less optimal code to be generated in the python script. */
7733
7734 uint8_t const uArgNo = pVar->uArgNo;
7735 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7736
7737 /* SIMD is bit simpler for now because there is no support for arguments. */
7738 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7739 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7740 {
7741 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7742 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7743 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7744 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7745 & fNotArgsMask;
7746 if (fRegs)
7747 {
7748 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7749 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7750 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7751 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7752 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7753 }
7754 else
7755 {
7756 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7757 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7758 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7759 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7760 }
7761 }
7762 else
7763 {
7764 idxReg = idxRegPref;
7765 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7766 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7767 }
7768 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7769
7770 pVar->fSimdReg = true;
7771 pVar->idxReg = idxReg;
7772
7773 /*
7774 * Load it off the stack if we've got a stack slot.
7775 */
7776 uint8_t const idxStackSlot = pVar->idxStackSlot;
7777 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7778 {
7779 Assert(fInitialized);
7780 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7781 switch (pVar->cbVar)
7782 {
7783 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7784 default: AssertFailed(); RT_FALL_THRU();
7785 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7786 }
7787 }
7788 else
7789 {
7790 Assert(idxStackSlot == UINT8_MAX);
7791 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7792 }
7793 pVar->fRegAcquired = true;
7794 return idxReg;
7795}
7796#endif
7797
7798
7799/**
7800 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7801 * guest register.
7802 *
7803 * This function makes sure there is a register for it and sets it to be the
7804 * current shadow copy of @a enmGstReg.
7805 *
7806 * @returns The host register number.
7807 * @param pReNative The recompiler state.
7808 * @param idxVar The variable.
7809 * @param enmGstReg The guest register this variable will be written to
7810 * after this call.
7811 * @param poff Pointer to the instruction buffer offset.
7812 * In case a register needs to be freed up or if the
7813 * variable content needs to be loaded off the stack.
7814 *
7815 * @note We DO NOT expect @a idxVar to be an argument variable,
7816 * because we can only in the commit stage of an instruction when this
7817 * function is used.
7818 */
7819DECL_HIDDEN_THROW(uint8_t)
7820iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7821{
7822 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7823 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7824 Assert(!pVar->fRegAcquired);
7825 AssertMsgStmt( pVar->cbVar <= 8
7826 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7827 || pVar->enmKind == kIemNativeVarKind_Stack),
7828 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7829 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7830 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7831
7832 /*
7833 * This shouldn't ever be used for arguments, unless it's in a weird else
7834 * branch that doesn't do any calling and even then it's questionable.
7835 *
7836 * However, in case someone writes crazy wrong MC code and does register
7837 * updates before making calls, just use the regular register allocator to
7838 * ensure we get a register suitable for the intended argument number.
7839 */
7840 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7841
7842 /*
7843 * If there is already a register for the variable, we transfer/set the
7844 * guest shadow copy assignment to it.
7845 */
7846 uint8_t idxReg = pVar->idxReg;
7847 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7848 {
7849#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7850 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7851 {
7852# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7853 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7854 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7855# endif
7856 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7857 }
7858#endif
7859
7860 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7861 {
7862 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7863 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7864 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7865 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7866 }
7867 else
7868 {
7869 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7870 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7871 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7872 }
7873 /** @todo figure this one out. We need some way of making sure the register isn't
7874 * modified after this point, just in case we start writing crappy MC code. */
7875 pVar->enmGstReg = enmGstReg;
7876 pVar->fRegAcquired = true;
7877 return idxReg;
7878 }
7879 Assert(pVar->uArgNo == UINT8_MAX);
7880
7881 /*
7882 * Because this is supposed to be the commit stage, we're just tag along with the
7883 * temporary register allocator and upgrade it to a variable register.
7884 */
7885 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7886 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7887 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7888 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7889 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7890 pVar->idxReg = idxReg;
7891
7892 /*
7893 * Now we need to load the register value.
7894 */
7895 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7896 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7897 else
7898 {
7899 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7900 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7901 switch (pVar->cbVar)
7902 {
7903 case sizeof(uint64_t):
7904 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7905 break;
7906 case sizeof(uint32_t):
7907 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7908 break;
7909 case sizeof(uint16_t):
7910 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7911 break;
7912 case sizeof(uint8_t):
7913 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7914 break;
7915 default:
7916 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7917 }
7918 }
7919
7920 pVar->fRegAcquired = true;
7921 return idxReg;
7922}
7923
7924
7925/**
7926 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7927 *
7928 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7929 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7930 * requirement of flushing anything in volatile host registers when making a
7931 * call.
7932 *
7933 * @returns New @a off value.
7934 * @param pReNative The recompiler state.
7935 * @param off The code buffer position.
7936 * @param fHstRegsNotToSave Set of registers not to save & restore.
7937 */
7938DECL_HIDDEN_THROW(uint32_t)
7939iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7940{
7941 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7942 if (fHstRegs)
7943 {
7944 do
7945 {
7946 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7947 fHstRegs &= ~RT_BIT_32(idxHstReg);
7948
7949 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7950 {
7951 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7952 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7953 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7954 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7955 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7956 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7957 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7958 {
7959 case kIemNativeVarKind_Stack:
7960 {
7961 /* Temporarily spill the variable register. */
7962 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7963 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7964 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7965 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7966 continue;
7967 }
7968
7969 case kIemNativeVarKind_Immediate:
7970 case kIemNativeVarKind_VarRef:
7971 case kIemNativeVarKind_GstRegRef:
7972 /* It is weird to have any of these loaded at this point. */
7973 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7974 continue;
7975
7976 case kIemNativeVarKind_End:
7977 case kIemNativeVarKind_Invalid:
7978 break;
7979 }
7980 AssertFailed();
7981 }
7982 else
7983 {
7984 /*
7985 * Allocate a temporary stack slot and spill the register to it.
7986 */
7987 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7988 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7989 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7990 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7991 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7992 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7993 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7994 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7995 }
7996 } while (fHstRegs);
7997 }
7998#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7999
8000 /*
8001 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
8002 * which would be more difficult due to spanning multiple stack slots and different sizes
8003 * (besides we only have a limited amount of slots at the moment).
8004 *
8005 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
8006 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
8007 */
8008 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
8009
8010 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
8011 if (fHstRegs)
8012 {
8013 do
8014 {
8015 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8016 fHstRegs &= ~RT_BIT_32(idxHstReg);
8017
8018 /* Fixed reserved and temporary registers don't need saving. */
8019 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved
8020 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp); included below */
8021 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8022
8023 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8024 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8025 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8026 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8027 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8028 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8029 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8030 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8031 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8032 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8033 {
8034 case kIemNativeVarKind_Stack:
8035 {
8036 /* Temporarily spill the variable register. */
8037 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8038 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8039 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8040 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8041 if (cbVar == sizeof(RTUINT128U))
8042 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8043 else
8044 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8045 continue;
8046 }
8047
8048 case kIemNativeVarKind_Immediate:
8049 case kIemNativeVarKind_VarRef:
8050 case kIemNativeVarKind_GstRegRef:
8051 /* It is weird to have any of these loaded at this point. */
8052 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8053 continue;
8054
8055 case kIemNativeVarKind_End:
8056 case kIemNativeVarKind_Invalid:
8057 break;
8058 }
8059 AssertFailed();
8060 } while (fHstRegs);
8061 }
8062#endif
8063 return off;
8064}
8065
8066
8067/**
8068 * Emit code to restore volatile registers after to a call to a helper.
8069 *
8070 * @returns New @a off value.
8071 * @param pReNative The recompiler state.
8072 * @param off The code buffer position.
8073 * @param fHstRegsNotToSave Set of registers not to save & restore.
8074 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8075 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8076 */
8077DECL_HIDDEN_THROW(uint32_t)
8078iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8079{
8080 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
8081 if (fHstRegs)
8082 {
8083 do
8084 {
8085 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8086 fHstRegs &= ~RT_BIT_32(idxHstReg);
8087
8088 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8089 {
8090 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8091 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8092 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8093 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8094 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8095 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8096 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8097 {
8098 case kIemNativeVarKind_Stack:
8099 {
8100 /* Unspill the variable register. */
8101 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8102 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8103 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8104 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8105 continue;
8106 }
8107
8108 case kIemNativeVarKind_Immediate:
8109 case kIemNativeVarKind_VarRef:
8110 case kIemNativeVarKind_GstRegRef:
8111 /* It is weird to have any of these loaded at this point. */
8112 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8113 continue;
8114
8115 case kIemNativeVarKind_End:
8116 case kIemNativeVarKind_Invalid:
8117 break;
8118 }
8119 AssertFailed();
8120 }
8121 else
8122 {
8123 /*
8124 * Restore from temporary stack slot.
8125 */
8126 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8127 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8128 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8129 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8130
8131 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8132 }
8133 } while (fHstRegs);
8134 }
8135#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8136 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
8137 if (fHstRegs)
8138 {
8139 do
8140 {
8141 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8142 fHstRegs &= ~RT_BIT_32(idxHstReg);
8143
8144 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp
8145 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved); - included below. */
8146 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8147
8148 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8149 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8150 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8151 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8152 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8153 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8154 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8155 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8156 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8157 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8158 {
8159 case kIemNativeVarKind_Stack:
8160 {
8161 /* Unspill the variable register. */
8162 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8163 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8164 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8165 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8166
8167 if (cbVar == sizeof(RTUINT128U))
8168 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8169 else
8170 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8171 continue;
8172 }
8173
8174 case kIemNativeVarKind_Immediate:
8175 case kIemNativeVarKind_VarRef:
8176 case kIemNativeVarKind_GstRegRef:
8177 /* It is weird to have any of these loaded at this point. */
8178 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8179 continue;
8180
8181 case kIemNativeVarKind_End:
8182 case kIemNativeVarKind_Invalid:
8183 break;
8184 }
8185 AssertFailed();
8186 } while (fHstRegs);
8187 }
8188#endif
8189 return off;
8190}
8191
8192
8193/**
8194 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8195 *
8196 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8197 *
8198 * ASSUMES that @a idxVar is valid and unpacked.
8199 */
8200DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8201{
8202 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8203 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8204 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8205 {
8206 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8207 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8208 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8209 Assert(cSlots > 0);
8210 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8211 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8212 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8213 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8214 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8215 }
8216 else
8217 Assert(idxStackSlot == UINT8_MAX);
8218}
8219
8220
8221/**
8222 * Worker that frees a single variable.
8223 *
8224 * ASSUMES that @a idxVar is valid and unpacked.
8225 */
8226DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8227{
8228 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8229 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8230 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8231
8232 /* Free the host register first if any assigned. */
8233 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8234#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8235 if ( idxHstReg != UINT8_MAX
8236 && pReNative->Core.aVars[idxVar].fSimdReg)
8237 {
8238 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8239 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8240 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8241 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8242 }
8243 else
8244#endif
8245 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8246 {
8247 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8248 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8249 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8250 }
8251
8252 /* Free argument mapping. */
8253 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8254 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8255 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8256
8257 /* Free the stack slots. */
8258 iemNativeVarFreeStackSlots(pReNative, idxVar);
8259
8260 /* Free the actual variable. */
8261 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8262 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8263}
8264
8265
8266/**
8267 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8268 */
8269DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8270{
8271 while (bmVars != 0)
8272 {
8273 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8274 bmVars &= ~RT_BIT_32(idxVar);
8275
8276#if 1 /** @todo optimize by simplifying this later... */
8277 iemNativeVarFreeOneWorker(pReNative, idxVar);
8278#else
8279 /* Only need to free the host register, the rest is done as bulk updates below. */
8280 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8281 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8282 {
8283 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8284 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8285 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8286 }
8287#endif
8288 }
8289#if 0 /** @todo optimize by simplifying this later... */
8290 pReNative->Core.bmVars = 0;
8291 pReNative->Core.bmStack = 0;
8292 pReNative->Core.u64ArgVars = UINT64_MAX;
8293#endif
8294}
8295
8296
8297
8298/*********************************************************************************************************************************
8299* Emitters for IEM_MC_CALL_CIMPL_XXX *
8300*********************************************************************************************************************************/
8301
8302/**
8303 * Emits code to load a reference to the given guest register into @a idxGprDst.
8304 */
8305DECL_HIDDEN_THROW(uint32_t)
8306iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8307 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8308{
8309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8310 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8311#endif
8312
8313 /*
8314 * Get the offset relative to the CPUMCTX structure.
8315 */
8316 uint32_t offCpumCtx;
8317 switch (enmClass)
8318 {
8319 case kIemNativeGstRegRef_Gpr:
8320 Assert(idxRegInClass < 16);
8321 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8322 break;
8323
8324 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8325 Assert(idxRegInClass < 4);
8326 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8327 break;
8328
8329 case kIemNativeGstRegRef_EFlags:
8330 Assert(idxRegInClass == 0);
8331 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8332 break;
8333
8334 case kIemNativeGstRegRef_MxCsr:
8335 Assert(idxRegInClass == 0);
8336 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8337 break;
8338
8339 case kIemNativeGstRegRef_FpuReg:
8340 Assert(idxRegInClass < 8);
8341 AssertFailed(); /** @todo what kind of indexing? */
8342 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8343 break;
8344
8345 case kIemNativeGstRegRef_MReg:
8346 Assert(idxRegInClass < 8);
8347 AssertFailed(); /** @todo what kind of indexing? */
8348 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8349 break;
8350
8351 case kIemNativeGstRegRef_XReg:
8352 Assert(idxRegInClass < 16);
8353 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8354 break;
8355
8356 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8357 Assert(idxRegInClass == 0);
8358 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8359 break;
8360
8361 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8362 Assert(idxRegInClass == 0);
8363 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8364 break;
8365
8366 default:
8367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8368 }
8369
8370 /*
8371 * Load the value into the destination register.
8372 */
8373#ifdef RT_ARCH_AMD64
8374 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8375
8376#elif defined(RT_ARCH_ARM64)
8377 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8378 Assert(offCpumCtx < 4096);
8379 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8380
8381#else
8382# error "Port me!"
8383#endif
8384
8385 return off;
8386}
8387
8388
8389/**
8390 * Common code for CIMPL and AIMPL calls.
8391 *
8392 * These are calls that uses argument variables and such. They should not be
8393 * confused with internal calls required to implement an MC operation,
8394 * like a TLB load and similar.
8395 *
8396 * Upon return all that is left to do is to load any hidden arguments and
8397 * perform the call. All argument variables are freed.
8398 *
8399 * @returns New code buffer offset; throws VBox status code on error.
8400 * @param pReNative The native recompile state.
8401 * @param off The code buffer offset.
8402 * @param cArgs The total nubmer of arguments (includes hidden
8403 * count).
8404 * @param cHiddenArgs The number of hidden arguments. The hidden
8405 * arguments must not have any variable declared for
8406 * them, whereas all the regular arguments must
8407 * (tstIEMCheckMc ensures this).
8408 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8409 * this will still flush pending writes in call volatile registers if false.
8410 */
8411DECL_HIDDEN_THROW(uint32_t)
8412iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8413 bool fFlushPendingWrites /*= true*/)
8414{
8415#ifdef VBOX_STRICT
8416 /*
8417 * Assert sanity.
8418 */
8419 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8420 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8421 for (unsigned i = 0; i < cHiddenArgs; i++)
8422 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8423 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8424 {
8425 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8426 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8427 }
8428 iemNativeRegAssertSanity(pReNative);
8429#endif
8430
8431 /* We don't know what the called function makes use of, so flush any pending register writes. */
8432 RT_NOREF(fFlushPendingWrites);
8433#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8434 if (fFlushPendingWrites)
8435#endif
8436 off = iemNativeRegFlushPendingWrites(pReNative, off);
8437
8438 /*
8439 * Before we do anything else, go over variables that are referenced and
8440 * make sure they are not in a register.
8441 */
8442 uint32_t bmVars = pReNative->Core.bmVars;
8443 if (bmVars)
8444 {
8445 do
8446 {
8447 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8448 bmVars &= ~RT_BIT_32(idxVar);
8449
8450 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8451 {
8452 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8453#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8454 if ( idxRegOld != UINT8_MAX
8455 && pReNative->Core.aVars[idxVar].fSimdReg)
8456 {
8457 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8458 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8459
8460 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8461 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8462 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8463 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8464 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8465 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8466 else
8467 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8468
8469 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8470 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8471
8472 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8473 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8474 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8475 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8476 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8477 }
8478 else
8479#endif
8480 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8481 {
8482 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8483 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8484 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8485 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8486 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8487
8488 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8489 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8490 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8491 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8492 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8493 }
8494 }
8495 } while (bmVars != 0);
8496#if 0 //def VBOX_STRICT
8497 iemNativeRegAssertSanity(pReNative);
8498#endif
8499 }
8500
8501 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8502
8503#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8504 /*
8505 * At the very first step go over the host registers that will be used for arguments
8506 * don't shadow anything which needs writing back first.
8507 */
8508 for (uint32_t i = 0; i < cRegArgs; i++)
8509 {
8510 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8511
8512 /* Writeback any dirty guest shadows before using this register. */
8513 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8514 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8515 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8516 }
8517#endif
8518
8519 /*
8520 * First, go over the host registers that will be used for arguments and make
8521 * sure they either hold the desired argument or are free.
8522 */
8523 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8524 {
8525 for (uint32_t i = 0; i < cRegArgs; i++)
8526 {
8527 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8528 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8529 {
8530 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8531 {
8532 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8534 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8535 Assert(pVar->idxReg == idxArgReg);
8536 uint8_t const uArgNo = pVar->uArgNo;
8537 if (uArgNo == i)
8538 { /* prefect */ }
8539 /* The variable allocator logic should make sure this is impossible,
8540 except for when the return register is used as a parameter (ARM,
8541 but not x86). */
8542#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8543 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8544 {
8545# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8546# error "Implement this"
8547# endif
8548 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8549 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8550 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8551 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8552 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8553 }
8554#endif
8555 else
8556 {
8557 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8558
8559 if (pVar->enmKind == kIemNativeVarKind_Stack)
8560 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8561 else
8562 {
8563 /* just free it, can be reloaded if used again */
8564 pVar->idxReg = UINT8_MAX;
8565 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8566 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8567 }
8568 }
8569 }
8570 else
8571 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8572 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8573 }
8574 }
8575#if 0 //def VBOX_STRICT
8576 iemNativeRegAssertSanity(pReNative);
8577#endif
8578 }
8579
8580 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8581
8582#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8583 /*
8584 * If there are any stack arguments, make sure they are in their place as well.
8585 *
8586 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8587 * the caller) be loading it later and it must be free (see first loop).
8588 */
8589 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8590 {
8591 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8592 {
8593 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8594 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8595 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8596 {
8597 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8598 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8599 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8600 pVar->idxReg = UINT8_MAX;
8601 }
8602 else
8603 {
8604 /* Use ARG0 as temp for stuff we need registers for. */
8605 switch (pVar->enmKind)
8606 {
8607 case kIemNativeVarKind_Stack:
8608 {
8609 uint8_t const idxStackSlot = pVar->idxStackSlot;
8610 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8611 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8612 iemNativeStackCalcBpDisp(idxStackSlot));
8613 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8614 continue;
8615 }
8616
8617 case kIemNativeVarKind_Immediate:
8618 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8619 continue;
8620
8621 case kIemNativeVarKind_VarRef:
8622 {
8623 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8624 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8625 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8626 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8627 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8628# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8629 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8630 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8631 if ( fSimdReg
8632 && idxRegOther != UINT8_MAX)
8633 {
8634 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8635 if (cbVar == sizeof(RTUINT128U))
8636 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8637 else
8638 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8639 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8640 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8641 }
8642 else
8643# endif
8644 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8645 {
8646 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8647 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8648 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8649 }
8650 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8651 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8652 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8653 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8654 continue;
8655 }
8656
8657 case kIemNativeVarKind_GstRegRef:
8658 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8659 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8660 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8661 continue;
8662
8663 case kIemNativeVarKind_Invalid:
8664 case kIemNativeVarKind_End:
8665 break;
8666 }
8667 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8668 }
8669 }
8670# if 0 //def VBOX_STRICT
8671 iemNativeRegAssertSanity(pReNative);
8672# endif
8673 }
8674#else
8675 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8676#endif
8677
8678 /*
8679 * Make sure the argument variables are loaded into their respective registers.
8680 *
8681 * We can optimize this by ASSUMING that any register allocations are for
8682 * registeres that have already been loaded and are ready. The previous step
8683 * saw to that.
8684 */
8685 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8686 {
8687 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8688 {
8689 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8690 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8691 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8692 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8693 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8694 else
8695 {
8696 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8697 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8698 {
8699 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8700 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8701 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8702 | RT_BIT_32(idxArgReg);
8703 pVar->idxReg = idxArgReg;
8704 }
8705 else
8706 {
8707 /* Use ARG0 as temp for stuff we need registers for. */
8708 switch (pVar->enmKind)
8709 {
8710 case kIemNativeVarKind_Stack:
8711 {
8712 uint8_t const idxStackSlot = pVar->idxStackSlot;
8713 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8714 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8715 continue;
8716 }
8717
8718 case kIemNativeVarKind_Immediate:
8719 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8720 continue;
8721
8722 case kIemNativeVarKind_VarRef:
8723 {
8724 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8725 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8726 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8727 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8728 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8729 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8730#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8731 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8732 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8733 if ( fSimdReg
8734 && idxRegOther != UINT8_MAX)
8735 {
8736 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8737 if (cbVar == sizeof(RTUINT128U))
8738 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8739 else
8740 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8741 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8742 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8743 }
8744 else
8745#endif
8746 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8747 {
8748 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8749 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8750 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8751 }
8752 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8753 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8754 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8755 continue;
8756 }
8757
8758 case kIemNativeVarKind_GstRegRef:
8759 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8760 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8761 continue;
8762
8763 case kIemNativeVarKind_Invalid:
8764 case kIemNativeVarKind_End:
8765 break;
8766 }
8767 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8768 }
8769 }
8770 }
8771#if 0 //def VBOX_STRICT
8772 iemNativeRegAssertSanity(pReNative);
8773#endif
8774 }
8775#ifdef VBOX_STRICT
8776 else
8777 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8778 {
8779 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8780 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8781 }
8782#endif
8783
8784 /*
8785 * Free all argument variables (simplified).
8786 * Their lifetime always expires with the call they are for.
8787 */
8788 /** @todo Make the python script check that arguments aren't used after
8789 * IEM_MC_CALL_XXXX. */
8790 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8791 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8792 * an argument value. There is also some FPU stuff. */
8793 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8794 {
8795 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8796 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8797
8798 /* no need to free registers: */
8799 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8800 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8801 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8802 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8803 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8804 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8805
8806 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8807 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8808 iemNativeVarFreeStackSlots(pReNative, idxVar);
8809 }
8810 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8811
8812 /*
8813 * Flush volatile registers as we make the call.
8814 */
8815 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8816
8817 return off;
8818}
8819
8820
8821
8822/*********************************************************************************************************************************
8823* TLB Lookup. *
8824*********************************************************************************************************************************/
8825
8826/**
8827 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8828 */
8829DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8830{
8831 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8832 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8833 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8834 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8835 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8836
8837 /* Do the lookup manually. */
8838 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8839 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8840 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8841 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8842 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8843 {
8844 /*
8845 * Check TLB page table level access flags.
8846 */
8847 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8848 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8849 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8850 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8851 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8852 | IEMTLBE_F_PG_UNASSIGNED
8853 | IEMTLBE_F_PT_NO_ACCESSED
8854 | fNoWriteNoDirty | fNoUser);
8855 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8856 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8857 {
8858 /*
8859 * Return the address.
8860 */
8861 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8862 if ((uintptr_t)pbAddr == uResult)
8863 return;
8864 RT_NOREF(cbMem);
8865 AssertFailed();
8866 }
8867 else
8868 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8869 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8870 }
8871 else
8872 AssertFailed();
8873 RT_BREAKPOINT();
8874}
8875
8876/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8877
8878
8879
8880/*********************************************************************************************************************************
8881* Recompiler Core. *
8882*********************************************************************************************************************************/
8883
8884/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8885static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8886{
8887 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8888 pDis->cbCachedInstr += cbMaxRead;
8889 RT_NOREF(cbMinRead);
8890 return VERR_NO_DATA;
8891}
8892
8893
8894DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8895{
8896 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8897 {
8898#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8899 ENTRY(fLocalForcedActions),
8900 ENTRY(iem.s.rcPassUp),
8901 ENTRY(iem.s.fExec),
8902 ENTRY(iem.s.pbInstrBuf),
8903 ENTRY(iem.s.uInstrBufPc),
8904 ENTRY(iem.s.GCPhysInstrBuf),
8905 ENTRY(iem.s.cbInstrBufTotal),
8906 ENTRY(iem.s.idxTbCurInstr),
8907 ENTRY(iem.s.fSkippingEFlags),
8908#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8909 ENTRY(iem.s.uPcUpdatingDebug),
8910#endif
8911#ifdef VBOX_WITH_STATISTICS
8912 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8913 ENTRY(iem.s.StatNativeTlbHitsForStore),
8914 ENTRY(iem.s.StatNativeTlbHitsForStack),
8915 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8916 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8917 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8918 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8919 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8920#endif
8921 ENTRY(iem.s.DataTlb.uTlbRevision),
8922 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8923 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8924 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8925 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8926 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8927 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8928 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8929 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8930 ENTRY(iem.s.DataTlb.aEntries),
8931 ENTRY(iem.s.CodeTlb.uTlbRevision),
8932 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8933 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8934 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8935 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8936 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8937 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8938 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8939 ENTRY(iem.s.CodeTlb.aEntries),
8940 ENTRY(pVMR3),
8941 ENTRY(cpum.GstCtx.rax),
8942 ENTRY(cpum.GstCtx.ah),
8943 ENTRY(cpum.GstCtx.rcx),
8944 ENTRY(cpum.GstCtx.ch),
8945 ENTRY(cpum.GstCtx.rdx),
8946 ENTRY(cpum.GstCtx.dh),
8947 ENTRY(cpum.GstCtx.rbx),
8948 ENTRY(cpum.GstCtx.bh),
8949 ENTRY(cpum.GstCtx.rsp),
8950 ENTRY(cpum.GstCtx.rbp),
8951 ENTRY(cpum.GstCtx.rsi),
8952 ENTRY(cpum.GstCtx.rdi),
8953 ENTRY(cpum.GstCtx.r8),
8954 ENTRY(cpum.GstCtx.r9),
8955 ENTRY(cpum.GstCtx.r10),
8956 ENTRY(cpum.GstCtx.r11),
8957 ENTRY(cpum.GstCtx.r12),
8958 ENTRY(cpum.GstCtx.r13),
8959 ENTRY(cpum.GstCtx.r14),
8960 ENTRY(cpum.GstCtx.r15),
8961 ENTRY(cpum.GstCtx.es.Sel),
8962 ENTRY(cpum.GstCtx.es.u64Base),
8963 ENTRY(cpum.GstCtx.es.u32Limit),
8964 ENTRY(cpum.GstCtx.es.Attr),
8965 ENTRY(cpum.GstCtx.cs.Sel),
8966 ENTRY(cpum.GstCtx.cs.u64Base),
8967 ENTRY(cpum.GstCtx.cs.u32Limit),
8968 ENTRY(cpum.GstCtx.cs.Attr),
8969 ENTRY(cpum.GstCtx.ss.Sel),
8970 ENTRY(cpum.GstCtx.ss.u64Base),
8971 ENTRY(cpum.GstCtx.ss.u32Limit),
8972 ENTRY(cpum.GstCtx.ss.Attr),
8973 ENTRY(cpum.GstCtx.ds.Sel),
8974 ENTRY(cpum.GstCtx.ds.u64Base),
8975 ENTRY(cpum.GstCtx.ds.u32Limit),
8976 ENTRY(cpum.GstCtx.ds.Attr),
8977 ENTRY(cpum.GstCtx.fs.Sel),
8978 ENTRY(cpum.GstCtx.fs.u64Base),
8979 ENTRY(cpum.GstCtx.fs.u32Limit),
8980 ENTRY(cpum.GstCtx.fs.Attr),
8981 ENTRY(cpum.GstCtx.gs.Sel),
8982 ENTRY(cpum.GstCtx.gs.u64Base),
8983 ENTRY(cpum.GstCtx.gs.u32Limit),
8984 ENTRY(cpum.GstCtx.gs.Attr),
8985 ENTRY(cpum.GstCtx.rip),
8986 ENTRY(cpum.GstCtx.eflags),
8987 ENTRY(cpum.GstCtx.uRipInhibitInt),
8988 ENTRY(cpum.GstCtx.cr0),
8989 ENTRY(cpum.GstCtx.cr4),
8990 ENTRY(cpum.GstCtx.aXcr[0]),
8991 ENTRY(cpum.GstCtx.aXcr[1]),
8992#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8993 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8994 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8995 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8996 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8997 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8998 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8999 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
9000 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
9001 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
9002 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
9003 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
9004 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
9005 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
9006 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
9007 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
9008 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
9009 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
9010 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
9011 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
9012 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
9013 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
9014 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
9015 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9016 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9017 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9018 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9019 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9020 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9021 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9022 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9023 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9024 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9025 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9026#endif
9027#undef ENTRY
9028 };
9029#ifdef VBOX_STRICT
9030 static bool s_fOrderChecked = false;
9031 if (!s_fOrderChecked)
9032 {
9033 s_fOrderChecked = true;
9034 uint32_t offPrev = s_aMembers[0].off;
9035 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9036 {
9037 Assert(s_aMembers[i].off > offPrev);
9038 offPrev = s_aMembers[i].off;
9039 }
9040 }
9041#endif
9042
9043 /*
9044 * Binary lookup.
9045 */
9046 unsigned iStart = 0;
9047 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9048 for (;;)
9049 {
9050 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9051 uint32_t const offCur = s_aMembers[iCur].off;
9052 if (off < offCur)
9053 {
9054 if (iCur != iStart)
9055 iEnd = iCur;
9056 else
9057 break;
9058 }
9059 else if (off > offCur)
9060 {
9061 if (iCur + 1 < iEnd)
9062 iStart = iCur + 1;
9063 else
9064 break;
9065 }
9066 else
9067 return s_aMembers[iCur].pszName;
9068 }
9069#ifdef VBOX_WITH_STATISTICS
9070 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9071 return "iem.s.acThreadedFuncStats[iFn]";
9072#endif
9073 return NULL;
9074}
9075
9076
9077/**
9078 * Translates a label to a name.
9079 */
9080static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
9081{
9082 switch (enmLabel)
9083 {
9084#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
9085 STR_CASE_CMN(Invalid);
9086 STR_CASE_CMN(RaiseDe);
9087 STR_CASE_CMN(RaiseUd);
9088 STR_CASE_CMN(RaiseSseRelated);
9089 STR_CASE_CMN(RaiseAvxRelated);
9090 STR_CASE_CMN(RaiseSseAvxFpRelated);
9091 STR_CASE_CMN(RaiseNm);
9092 STR_CASE_CMN(RaiseGp0);
9093 STR_CASE_CMN(RaiseMf);
9094 STR_CASE_CMN(RaiseXf);
9095 STR_CASE_CMN(ObsoleteTb);
9096 STR_CASE_CMN(NeedCsLimChecking);
9097 STR_CASE_CMN(CheckBranchMiss);
9098 STR_CASE_CMN(ReturnSuccess);
9099 STR_CASE_CMN(ReturnBreak);
9100 STR_CASE_CMN(ReturnBreakFF);
9101 STR_CASE_CMN(ReturnWithFlags);
9102 STR_CASE_CMN(ReturnBreakViaLookup);
9103 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
9104 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
9105 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
9106 STR_CASE_CMN(NonZeroRetOrPassUp);
9107#undef STR_CASE_CMN
9108#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
9109 STR_CASE_LBL(LoopJumpTarget);
9110 STR_CASE_LBL(If);
9111 STR_CASE_LBL(Else);
9112 STR_CASE_LBL(Endif);
9113 STR_CASE_LBL(CheckIrq);
9114 STR_CASE_LBL(TlbLookup);
9115 STR_CASE_LBL(TlbMiss);
9116 STR_CASE_LBL(TlbDone);
9117 case kIemNativeLabelType_End: break;
9118 }
9119 return NULL;
9120}
9121
9122
9123/** Info for the symbols resolver used when disassembling. */
9124typedef struct IEMNATIVDISASMSYMCTX
9125{
9126 PVMCPU pVCpu;
9127 PCIEMTB pTb;
9128 PCIEMNATIVEPERCHUNKCTX pCtx;
9129#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9130 PCIEMTBDBG pDbgInfo;
9131#endif
9132} IEMNATIVDISASMSYMCTX;
9133typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9134
9135
9136/**
9137 * Resolve address to symbol, if we can.
9138 */
9139static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9140{
9141 PCIEMTB const pTb = pSymCtx->pTb;
9142 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9143 if (offNative <= pTb->Native.cInstructions)
9144 {
9145#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9146 /*
9147 * Scan debug info for a matching label.
9148 * Since the debug info should be 100% linear, we can do a binary search here.
9149 */
9150 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9151 if (pDbgInfo)
9152 {
9153 uint32_t const cEntries = pDbgInfo->cEntries;
9154 uint32_t idxEnd = cEntries;
9155 uint32_t idxStart = 0;
9156 for (;;)
9157 {
9158 /* Find a NativeOffset record close to the midpoint. */
9159 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9160 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9161 idx--;
9162 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9163 {
9164 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9165 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9166 idx++;
9167 if (idx >= idxEnd)
9168 break;
9169 }
9170
9171 /* Do the binary searching thing. */
9172 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9173 {
9174 if (idx > idxStart)
9175 idxEnd = idx;
9176 else
9177 break;
9178 }
9179 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9180 {
9181 idx += 1;
9182 if (idx < idxEnd)
9183 idxStart = idx;
9184 else
9185 break;
9186 }
9187 else
9188 {
9189 /* Got a matching offset, scan forward till we hit a label, but
9190 stop when the native offset changes. */
9191 while (++idx < cEntries)
9192 switch (pDbgInfo->aEntries[idx].Gen.uType)
9193 {
9194 case kIemTbDbgEntryType_Label:
9195 {
9196 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9197 const char * const pszName = iemNativeGetLabelName(enmLabel);
9198 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9199 return pszName;
9200 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9201 return pszBuf;
9202 }
9203
9204 case kIemTbDbgEntryType_NativeOffset:
9205 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9206 return NULL;
9207 break;
9208 }
9209 break;
9210 }
9211 }
9212 }
9213#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9214 }
9215 else
9216 {
9217 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9218 if (pChunkCtx)
9219 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9220 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9221 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9222 }
9223 RT_NOREF(pszBuf, cbBuf);
9224 return NULL;
9225}
9226
9227#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9228
9229/**
9230 * @callback_method_impl{FNDISGETSYMBOL}
9231 */
9232static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9233 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9234{
9235 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9236 if (pszSym)
9237 {
9238 *poff = 0;
9239 if (pszSym != pszBuf)
9240 return RTStrCopy(pszBuf, cchBuf, pszSym);
9241 return VINF_SUCCESS;
9242 }
9243 RT_NOREF(pDis, u32Sel);
9244 return VERR_SYMBOL_NOT_FOUND;
9245}
9246
9247#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9248
9249/**
9250 * Annotates an instruction decoded by the capstone disassembler.
9251 */
9252static const char *
9253iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9254{
9255# if defined(RT_ARCH_ARM64)
9256 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9257 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9258 {
9259 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9260 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9261 char const *psz = strchr(pInstr->op_str, '[');
9262 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9263 {
9264 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9265 int32_t off = -1;
9266 psz += 4;
9267 if (*psz == ']')
9268 off = 0;
9269 else if (*psz == ',')
9270 {
9271 psz = RTStrStripL(psz + 1);
9272 if (*psz == '#')
9273 off = RTStrToInt32(&psz[1]);
9274 /** @todo deal with index registers and LSL as well... */
9275 }
9276 if (off >= 0)
9277 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9278 }
9279 }
9280 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9281 {
9282 const char *pszAddr = strchr(pInstr->op_str, '#');
9283 if (pszAddr)
9284 {
9285 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9286 if (uAddr != 0)
9287 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9288 }
9289 }
9290# endif
9291 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9292 return NULL;
9293}
9294#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9295
9296
9297DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9298{
9299 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9300#if defined(RT_ARCH_AMD64)
9301 static const char * const a_apszMarkers[] =
9302 {
9303 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9304 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9305 };
9306#endif
9307
9308 char szDisBuf[512];
9309 DISSTATE Dis;
9310 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9311 uint32_t const cNative = pTb->Native.cInstructions;
9312 uint32_t offNative = 0;
9313#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9314 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9315#endif
9316 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9317 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9318 : DISCPUMODE_64BIT;
9319#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9320 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9321#else
9322 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9323#endif
9324#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9325 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9326#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9327 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9328#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9329# error "Port me"
9330#else
9331 csh hDisasm = ~(size_t)0;
9332# if defined(RT_ARCH_AMD64)
9333 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9334# elif defined(RT_ARCH_ARM64)
9335 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9336# else
9337# error "Port me"
9338# endif
9339 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9340
9341 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9342 //Assert(rcCs == CS_ERR_OK);
9343#endif
9344
9345 /*
9346 * Print TB info.
9347 */
9348 pHlp->pfnPrintf(pHlp,
9349 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9350 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9351 pTb, pTb->GCPhysPc,
9352#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9353 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9354#else
9355 pTb->FlatPc,
9356#endif
9357 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9358 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9359#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9360 if (pDbgInfo && pDbgInfo->cEntries > 1)
9361 {
9362 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9363
9364 /*
9365 * This disassembly is driven by the debug info which follows the native
9366 * code and indicates when it starts with the next guest instructions,
9367 * where labels are and such things.
9368 */
9369 uint32_t idxThreadedCall = 0;
9370 uint32_t idxGuestInstr = 0;
9371 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9372 uint8_t idxRange = UINT8_MAX;
9373 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9374 uint32_t offRange = 0;
9375 uint32_t offOpcodes = 0;
9376 uint32_t const cbOpcodes = pTb->cbOpcodes;
9377 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9378 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9379 uint32_t iDbgEntry = 1;
9380 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9381
9382 while (offNative < cNative)
9383 {
9384 /* If we're at or have passed the point where the next chunk of debug
9385 info starts, process it. */
9386 if (offDbgNativeNext <= offNative)
9387 {
9388 offDbgNativeNext = UINT32_MAX;
9389 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9390 {
9391 switch ((IEMTBDBGENTRYTYPE)pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9392 {
9393 case kIemTbDbgEntryType_GuestInstruction:
9394 {
9395 /* Did the exec flag change? */
9396 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9397 {
9398 pHlp->pfnPrintf(pHlp,
9399 " fExec change %#08x -> %#08x %s\n",
9400 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9401 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9402 szDisBuf, sizeof(szDisBuf)));
9403 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9404 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9405 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9406 : DISCPUMODE_64BIT;
9407 }
9408
9409 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9410 where the compilation was aborted before the opcode was recorded and the actual
9411 instruction was translated to a threaded call. This may happen when we run out
9412 of ranges, or when some complicated interrupts/FFs are found to be pending or
9413 similar. So, we just deal with it here rather than in the compiler code as it
9414 is a lot simpler to do here. */
9415 if ( idxRange == UINT8_MAX
9416 || idxRange >= cRanges
9417 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9418 {
9419 idxRange += 1;
9420 if (idxRange < cRanges)
9421 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9422 else
9423 continue;
9424 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9425 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9426 + (pTb->aRanges[idxRange].idxPhysPage == 0
9427 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9428 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9429 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9430 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9431 pTb->aRanges[idxRange].idxPhysPage);
9432 GCPhysPc += offRange;
9433 }
9434
9435 /* Disassemble the instruction. */
9436 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9437 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9438 uint32_t cbInstr = 1;
9439 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9440 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9441 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9442 if (RT_SUCCESS(rc))
9443 {
9444 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9445 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9446 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9447 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9448
9449 static unsigned const s_offMarker = 55;
9450 static char const s_szMarker[] = " ; <--- guest";
9451 if (cch < s_offMarker)
9452 {
9453 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9454 cch = s_offMarker;
9455 }
9456 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9457 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9458
9459 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9460 }
9461 else
9462 {
9463 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9464 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9465 cbInstr = 1;
9466 }
9467 idxGuestInstr++;
9468 GCPhysPc += cbInstr;
9469 offOpcodes += cbInstr;
9470 offRange += cbInstr;
9471 continue;
9472 }
9473
9474 case kIemTbDbgEntryType_ThreadedCall:
9475 pHlp->pfnPrintf(pHlp,
9476 " Call #%u to %s (%u args) - %s\n",
9477 idxThreadedCall,
9478 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9479 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9480 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9481 idxThreadedCall++;
9482 continue;
9483
9484 case kIemTbDbgEntryType_GuestRegShadowing:
9485 {
9486 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9487 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9488 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9489 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9490 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9491 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9492 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9493 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9494 else
9495 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9496 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9497 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9498 continue;
9499 }
9500
9501# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9502 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9503 {
9504 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9505 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9506 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9507 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9508 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9509 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9510 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9511 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9512 else
9513 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9514 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9515 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9516 continue;
9517 }
9518# endif
9519
9520 case kIemTbDbgEntryType_Label:
9521 {
9522 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9523 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9524 {
9525 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9526 ? " ; regs state restored pre-if-block" : "";
9527 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9528 }
9529 else
9530 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9531 continue;
9532 }
9533
9534 case kIemTbDbgEntryType_NativeOffset:
9535 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9536 Assert(offDbgNativeNext >= offNative);
9537 break;
9538
9539# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9540 case kIemTbDbgEntryType_DelayedPcUpdate:
9541 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9542 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9543 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9544 continue;
9545# endif
9546
9547# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9548 case kIemTbDbgEntryType_GuestRegDirty:
9549 {
9550 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9551 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9552 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9553 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9554 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9555 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9556 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9557 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9558 pszGstReg, pszHstReg);
9559 continue;
9560 }
9561
9562 case kIemTbDbgEntryType_GuestRegWriteback:
9563 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9564 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9565 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9566 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9567 continue;
9568# endif
9569
9570# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9571 case kIemTbDbgEntryType_PostponedEFlagsCalc:
9572 {
9573 const char *pszOp = "!unknown!";
9574 switch ((IEMNATIVE_POSTPONED_EFL_OP_T)pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.enmOp)
9575 {
9576 case kIemNativePostponedEflOp_Logical: pszOp = "logical"; break;
9577 case kIemNativePostponedEflOp_Invalid: break;
9578 case kIemNativePostponedEflOp_End: break;
9579 }
9580 pHlp->pfnPrintf(pHlp, " Postponed EFLAGS calc #%u: %s %u bits\n",
9581 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.idxEmit, pszOp,
9582 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.cOpBits);
9583 continue;
9584 }
9585# endif
9586 default:
9587 AssertFailed();
9588 continue;
9589 }
9590 /* Break out of the loop at kIemTbDbgEntryType_NativeOffset. */
9591 iDbgEntry++;
9592 break;
9593 }
9594 }
9595
9596 /*
9597 * Disassemble the next native instruction.
9598 */
9599 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9600# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9601 uint32_t cbInstr = sizeof(paNative[0]);
9602 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9603 if (RT_SUCCESS(rc))
9604 {
9605# if defined(RT_ARCH_AMD64)
9606 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9607 {
9608 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9609 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9610 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9611 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9612 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9613 uInfo & 0x8000 ? "recompiled" : "todo");
9614 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9615 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9616 else
9617 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9618 }
9619 else
9620# endif
9621 {
9622 const char *pszAnnotation = NULL;
9623# ifdef RT_ARCH_AMD64
9624 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9625 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9626 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9627 iemNativeDisasmGetSymbolCb, &SymCtx);
9628 PCDISOPPARAM pMemOp;
9629 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9630 pMemOp = &Dis.aParams[0];
9631 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9632 pMemOp = &Dis.aParams[1];
9633 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9634 pMemOp = &Dis.aParams[2];
9635 else
9636 pMemOp = NULL;
9637 if ( pMemOp
9638 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9639 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9640 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9641 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9642
9643# elif defined(RT_ARCH_ARM64)
9644 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9645 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9646 iemNativeDisasmGetSymbolCb, &SymCtx);
9647# else
9648# error "Port me"
9649# endif
9650 if (pszAnnotation)
9651 {
9652 static unsigned const s_offAnnotation = 55;
9653 size_t const cchAnnotation = strlen(pszAnnotation);
9654 size_t cchDis = strlen(szDisBuf);
9655 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9656 {
9657 if (cchDis < s_offAnnotation)
9658 {
9659 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9660 cchDis = s_offAnnotation;
9661 }
9662 szDisBuf[cchDis++] = ' ';
9663 szDisBuf[cchDis++] = ';';
9664 szDisBuf[cchDis++] = ' ';
9665 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9666 }
9667 }
9668 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9669 }
9670 }
9671 else
9672 {
9673# if defined(RT_ARCH_AMD64)
9674 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9675 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9676# elif defined(RT_ARCH_ARM64)
9677 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9678# else
9679# error "Port me"
9680# endif
9681 cbInstr = sizeof(paNative[0]);
9682 }
9683 offNative += cbInstr / sizeof(paNative[0]);
9684
9685# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9686 cs_insn *pInstr;
9687 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9688 (uintptr_t)pNativeCur, 1, &pInstr);
9689 if (cInstrs > 0)
9690 {
9691 Assert(cInstrs == 1);
9692 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9693 size_t const cchOp = strlen(pInstr->op_str);
9694# if defined(RT_ARCH_AMD64)
9695 if (pszAnnotation)
9696 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9697 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9698 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9699 else
9700 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9701 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9702
9703# else
9704 if (pszAnnotation)
9705 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9706 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9707 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9708 else
9709 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9710 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9711# endif
9712 offNative += pInstr->size / sizeof(*pNativeCur);
9713 cs_free(pInstr, cInstrs);
9714 }
9715 else
9716 {
9717# if defined(RT_ARCH_AMD64)
9718 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9719 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9720# else
9721 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9722# endif
9723 offNative++;
9724 }
9725# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9726 }
9727 }
9728 else
9729#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9730 {
9731 /*
9732 * No debug info, just disassemble the x86 code and then the native code.
9733 *
9734 * First the guest code:
9735 */
9736 for (unsigned i = 0; i < pTb->cRanges; i++)
9737 {
9738 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9739 + (pTb->aRanges[i].idxPhysPage == 0
9740 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9741 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9742 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9743 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9744 unsigned off = pTb->aRanges[i].offOpcodes;
9745 /** @todo this ain't working when crossing pages! */
9746 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9747 while (off < cbOpcodes)
9748 {
9749 uint32_t cbInstr = 1;
9750 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9751 &pTb->pabOpcodes[off], cbOpcodes - off,
9752 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9753 if (RT_SUCCESS(rc))
9754 {
9755 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9756 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9757 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9758 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9759 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9760 GCPhysPc += cbInstr;
9761 off += cbInstr;
9762 }
9763 else
9764 {
9765 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9766 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9767 break;
9768 }
9769 }
9770 }
9771
9772 /*
9773 * Then the native code:
9774 */
9775 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9776 while (offNative < cNative)
9777 {
9778 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9779#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9780 uint32_t cbInstr = sizeof(paNative[0]);
9781 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9782 if (RT_SUCCESS(rc))
9783 {
9784# if defined(RT_ARCH_AMD64)
9785 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9786 {
9787 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9788 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9789 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9790 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9791 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9792 uInfo & 0x8000 ? "recompiled" : "todo");
9793 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9794 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9795 else
9796 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9797 }
9798 else
9799# endif
9800 {
9801# ifdef RT_ARCH_AMD64
9802 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9803 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9804 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9805 iemNativeDisasmGetSymbolCb, &SymCtx);
9806# elif defined(RT_ARCH_ARM64)
9807 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9808 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9809 iemNativeDisasmGetSymbolCb, &SymCtx);
9810# else
9811# error "Port me"
9812# endif
9813 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9814 }
9815 }
9816 else
9817 {
9818# if defined(RT_ARCH_AMD64)
9819 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9820 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9821# else
9822 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9823# endif
9824 cbInstr = sizeof(paNative[0]);
9825 }
9826 offNative += cbInstr / sizeof(paNative[0]);
9827
9828#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9829 cs_insn *pInstr;
9830 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9831 (uintptr_t)pNativeCur, 1, &pInstr);
9832 if (cInstrs > 0)
9833 {
9834 Assert(cInstrs == 1);
9835 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9836 size_t const cchOp = strlen(pInstr->op_str);
9837# if defined(RT_ARCH_AMD64)
9838 if (pszAnnotation)
9839 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9840 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9841 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9842 else
9843 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9844 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9845
9846# else
9847 if (pszAnnotation)
9848 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9849 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9850 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9851 else
9852 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9853 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9854# endif
9855 offNative += pInstr->size / sizeof(*pNativeCur);
9856 cs_free(pInstr, cInstrs);
9857 }
9858 else
9859 {
9860# if defined(RT_ARCH_AMD64)
9861 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9862 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9863# else
9864 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9865# endif
9866 offNative++;
9867 }
9868#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9869 }
9870 }
9871
9872#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9873 /* Cleanup. */
9874 cs_close(&hDisasm);
9875#endif
9876}
9877
9878
9879/** Emit alignment padding between labels / functions. */
9880DECL_INLINE_THROW(uint32_t)
9881iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9882{
9883 if (off & fAlignMask)
9884 {
9885 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9886 while (off & fAlignMask)
9887#if defined(RT_ARCH_AMD64)
9888 pCodeBuf[off++] = 0xcc;
9889#elif defined(RT_ARCH_ARM64)
9890 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9891#else
9892# error "port me"
9893#endif
9894 }
9895 return off;
9896}
9897
9898
9899/**
9900 * Called when a new chunk is allocate to emit common per-chunk code.
9901 *
9902 * Allocates a per-chunk context directly from the chunk itself and place the
9903 * common code there.
9904 *
9905 * @returns VBox status code.
9906 * @param pVCpu The cross context virtual CPU structure of the calling
9907 * thread.
9908 * @param idxChunk The index of the chunk being added and requiring a
9909 * common code context.
9910 * @param ppCtx Where to return the pointer to the chunk context start.
9911 */
9912DECLHIDDEN(int) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk, PCIEMNATIVEPERCHUNKCTX *ppCtx)
9913{
9914 *ppCtx = NULL;
9915
9916 /*
9917 * Allocate a new recompiler state (since we're likely to be called while
9918 * the default one is fully loaded already with a recompiled TB).
9919 *
9920 * This is a bit of overkill, but this isn't a frequently used code path.
9921 */
9922 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9923 AssertReturn(pReNative, VERR_NO_MEMORY);
9924
9925#if defined(RT_ARCH_AMD64)
9926 uint32_t const fAlignMask = 15;
9927#elif defined(RT_ARCH_ARM64)
9928 uint32_t const fAlignMask = 31 / 4;
9929#else
9930# error "port me"
9931#endif
9932 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9933 int rc = VINF_SUCCESS;
9934 uint32_t off = 0;
9935
9936 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9937 {
9938 /*
9939 * Emit the epilog code.
9940 */
9941 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9942 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9943 uint32_t const offReturnWithStatus = off;
9944 off = iemNativeEmitCoreEpilog(pReNative, off);
9945
9946 /*
9947 * Generate special jump labels. All of these gets a copy of the epilog code.
9948 */
9949 static struct
9950 {
9951 IEMNATIVELABELTYPE enmExitReason;
9952 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9953 } const s_aSpecialWithEpilogs[] =
9954 {
9955 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9956 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9957 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9958 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9959 };
9960 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9961 {
9962 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9963 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9964 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9965 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9966 off = iemNativeEmitCoreEpilog(pReNative, off);
9967 }
9968
9969 /*
9970 * Do what iemNativeEmitReturnBreakViaLookup does.
9971 */
9972 static struct
9973 {
9974 IEMNATIVELABELTYPE enmExitReason;
9975 uintptr_t pfnHelper;
9976 } const s_aViaLookup[] =
9977 {
9978 { kIemNativeLabelType_ReturnBreakViaLookup,
9979 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9980 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9981 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9982 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9983 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9984 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9985 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9986 };
9987 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9988 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9989 {
9990 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9991 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9992 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9993 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9994 }
9995
9996 /*
9997 * Generate simple TB tail labels that just calls a help with a pVCpu
9998 * arg and either return or longjmps/throws a non-zero status.
9999 */
10000 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10001 static struct
10002 {
10003 IEMNATIVELABELTYPE enmExitReason;
10004 bool fWithEpilog;
10005 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10006 } const s_aSimpleTailLabels[] =
10007 {
10008 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
10009 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
10010 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
10011 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
10012 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
10013 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
10014 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
10015 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
10016 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
10017 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
10018 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
10019 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
10020 };
10021 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
10022 {
10023 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
10024 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
10025 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
10026
10027 /* int pfnCallback(PVMCPUCC pVCpu) */
10028 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10029 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
10030
10031 /* If the callback is supposed to return with a status code we inline the epilog
10032 sequence for better speed. Otherwise, if the callback shouldn't return because
10033 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
10034 if (s_aSimpleTailLabels[i].fWithEpilog)
10035 off = iemNativeEmitCoreEpilog(pReNative, off);
10036 else
10037 {
10038#ifdef VBOX_STRICT
10039 off = iemNativeEmitBrk(pReNative, off, 0x2201);
10040#endif
10041 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
10042 }
10043 }
10044
10045
10046#ifdef VBOX_STRICT
10047 /* Make sure we've generate code for all labels. */
10048 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
10049 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10050#endif
10051 }
10052 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10053 {
10054 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
10055 iemNativeTerm(pReNative);
10056 return rc;
10057 }
10058 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10059
10060 /*
10061 * Allocate memory for the context (first) and the common code (last).
10062 */
10063 PIEMNATIVEPERCHUNKCTX pCtx;
10064 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
10065 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
10066 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
10067 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
10068 AssertLogRelMsgReturnStmt(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk),
10069 iemNativeTerm(pReNative), VERR_OUT_OF_RESOURCES);
10070
10071 /*
10072 * Copy over the generated code.
10073 * There should be no fixups or labels defined here.
10074 */
10075 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
10076 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
10077
10078 Assert(pReNative->cFixups == 0);
10079 Assert(pReNative->cLabels == 0);
10080
10081 /*
10082 * Initialize the context.
10083 */
10084 AssertCompile(kIemNativeLabelType_Invalid == 0);
10085 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
10086 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
10087 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
10088 {
10089 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10090 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
10091 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
10092 }
10093
10094 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
10095
10096 iemNativeTerm(pReNative);
10097 *ppCtx = pCtx;
10098 return VINF_SUCCESS;
10099}
10100
10101
10102/**
10103 * Recompiles the given threaded TB into a native one.
10104 *
10105 * In case of failure the translation block will be returned as-is.
10106 *
10107 * @returns pTb.
10108 * @param pVCpu The cross context virtual CPU structure of the calling
10109 * thread.
10110 * @param pTb The threaded translation to recompile to native.
10111 */
10112IEM_DECL_MSC_GUARD_IGNORE DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10113{
10114#if 0 /* For profiling the native recompiler code. */
10115l_profile_again:
10116#endif
10117 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10118
10119 /*
10120 * The first time thru, we allocate the recompiler state and save it,
10121 * all the other times we'll just reuse the saved one after a quick reset.
10122 */
10123 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10124 if (RT_LIKELY(pReNative))
10125 iemNativeReInit(pReNative, pTb);
10126 else
10127 {
10128 pReNative = iemNativeInit(pVCpu, pTb);
10129 AssertReturn(pReNative, pTb);
10130 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10131 }
10132
10133#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10134 /*
10135 * First do liveness analysis. This is done backwards.
10136 */
10137 {
10138 uint32_t idxCall = pTb->Thrd.cCalls;
10139 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10140 { /* likely */ }
10141 else
10142 {
10143 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10144 while (idxCall > cAlloc)
10145 cAlloc *= 2;
10146 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10147 AssertReturn(pvNew, pTb);
10148 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10149 pReNative->cLivenessEntriesAlloc = cAlloc;
10150 }
10151 AssertReturn(idxCall > 0, pTb);
10152 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10153
10154 /* The initial (final) entry. */
10155 idxCall--;
10156 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10157
10158 /* Loop backwards thru the calls and fill in the other entries. */
10159 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10160 while (idxCall > 0)
10161 {
10162 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10163 Assert(pfnLiveness);
10164 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10165 pCallEntry--;
10166 idxCall--;
10167 }
10168 }
10169#endif
10170
10171 /*
10172 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10173 * for aborting if an error happens.
10174 */
10175 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10176#ifdef LOG_ENABLED
10177 uint32_t const cCallsOrg = cCallsLeft;
10178#endif
10179 uint32_t off = 0;
10180 int rc = VINF_SUCCESS;
10181 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10182 {
10183 /*
10184 * Convert the calls to native code.
10185 */
10186#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10187 int32_t iGstInstr = -1;
10188#endif
10189#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10190 uint32_t cThreadedCalls = 0;
10191 uint32_t cRecompiledCalls = 0;
10192#endif
10193#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10194 uint32_t idxCurCall = 0;
10195#endif
10196 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10197 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10198 while (cCallsLeft-- > 0)
10199 {
10200 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10201#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10202 pReNative->idxCurCall = idxCurCall;
10203#endif
10204
10205#ifdef IEM_WITH_INTRA_TB_JUMPS
10206 /*
10207 * Define label for jump targets (currently only the first entry).
10208 */
10209 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10210 { /* likely */ }
10211 else
10212 {
10213 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10214 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10215 }
10216#endif
10217
10218 /*
10219 * Debug info, assembly markup and statistics.
10220 */
10221#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10222 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10223 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10224#endif
10225#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10226 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10227 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10228 {
10229 if (iGstInstr < (int32_t)pTb->cInstructions)
10230 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10231 else
10232 Assert(iGstInstr == pTb->cInstructions);
10233 iGstInstr = pCallEntry->idxInstr;
10234 }
10235 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10236#endif
10237#if defined(VBOX_STRICT)
10238 off = iemNativeEmitMarker(pReNative, off,
10239 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10240#endif
10241#if defined(VBOX_STRICT)
10242 iemNativeRegAssertSanity(pReNative);
10243#endif
10244#ifdef VBOX_WITH_STATISTICS
10245 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10246#endif
10247
10248#if 0
10249 if ( pTb->GCPhysPc == 0x00000000000c1240
10250 && idxCurCall == 67)
10251 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10252#endif
10253
10254 /*
10255 * Actual work.
10256 */
10257 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10258 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10259 if (pfnRecom) /** @todo stats on this. */
10260 {
10261 off = pfnRecom(pReNative, off, pCallEntry);
10262 STAM_REL_STATS({cRecompiledCalls++;});
10263 }
10264 else
10265 {
10266 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10267 STAM_REL_STATS({cThreadedCalls++;});
10268 }
10269 Assert(off <= pReNative->cInstrBufAlloc);
10270 Assert(pReNative->cCondDepth == 0);
10271
10272#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10273 if (LogIs2Enabled())
10274 {
10275 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10276# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10277 static const char s_achState[] = "CUXI";
10278# else
10279 /* 0123 4567 89ab cdef */
10280 /* CCCC CCCC */
10281 /* WWWW WWWW */
10282 /* RR RR RR RR */
10283 /* P P P P P P P P */
10284 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10285# endif
10286
10287 char szGpr[17];
10288 for (unsigned i = 0; i < 16; i++)
10289 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10290 szGpr[16] = '\0';
10291
10292 char szSegBase[X86_SREG_COUNT + 1];
10293 char szSegLimit[X86_SREG_COUNT + 1];
10294 char szSegAttrib[X86_SREG_COUNT + 1];
10295 char szSegSel[X86_SREG_COUNT + 1];
10296 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10297 {
10298 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10299 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10300 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10301 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10302 }
10303 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10304 = szSegSel[X86_SREG_COUNT] = '\0';
10305
10306 char szEFlags[IEMLIVENESSBIT_IDX_EFL_COUNT + 1];
10307 for (unsigned i = 0; i < IEMLIVENESSBIT_IDX_EFL_COUNT; i++)
10308 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10309 szEFlags[7] = '\0';
10310
10311 Log2(("liveness: gpr=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10312 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10313 }
10314#endif
10315
10316 /*
10317 * Advance.
10318 */
10319 pCallEntry++;
10320#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10321 idxCurCall++;
10322#endif
10323 }
10324
10325 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10326 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10327 if (!cThreadedCalls)
10328 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10329
10330 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, UINT32_MAX);
10331
10332#ifdef VBOX_WITH_STATISTICS
10333 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10334#endif
10335
10336 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10337 off = iemNativeRegFlushPendingWrites(pReNative, off);
10338
10339 /*
10340 * Jump to the common per-chunk epilog code.
10341 */
10342 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10343 off = iemNativeEmitTbExit<kIemNativeLabelType_ReturnSuccess, true, false>(pReNative, off);
10344
10345 /*
10346 * Generate tail labels with jumps to the common per-chunk code on non-x86 hosts.
10347 */
10348#ifndef RT_ARCH_AMD64
10349 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10350 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10351 AssertCompile(kIemNativeLabelType_Invalid == 0);
10352 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10353 if (fTailLabels)
10354 {
10355 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, kIemNativeLabelType_LastTbExit + 1);
10356 do
10357 {
10358 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10359 fTailLabels &= ~RT_BIT_64(enmLabel);
10360
10361 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10362 AssertContinue(idxLabel != UINT32_MAX);
10363 iemNativeLabelDefine(pReNative, idxLabel, off);
10364
10365 iemNativeAddTbExitFixup(pReNative, off, enmLabel);
10366# ifdef RT_ARCH_ARM64
10367 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
10368# else
10369# error "port me"
10370# endif
10371 } while (fTailLabels);
10372 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10373 }
10374#else
10375 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10376#endif
10377 }
10378 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10379 {
10380 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10381 return pTb;
10382 }
10383 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10384 Assert(off <= pReNative->cInstrBufAlloc);
10385
10386 /*
10387 * Make sure all labels has been defined.
10388 */
10389 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10390#ifdef VBOX_STRICT
10391 uint32_t const cLabels = pReNative->cLabels;
10392 for (uint32_t i = 0; i < cLabels; i++)
10393 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10394#endif
10395
10396#if 0 /* For profiling the native recompiler code. */
10397 if (pTb->Thrd.cCalls >= 136)
10398 {
10399 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10400 goto l_profile_again;
10401 }
10402#endif
10403
10404 /*
10405 * Allocate executable memory, copy over the code we've generated.
10406 */
10407 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10408 if (pTbAllocator->pDelayedFreeHead)
10409 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10410
10411 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10412 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10413 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10414 &paFinalInstrBufRx, &pCtx);
10415
10416 AssertReturn(paFinalInstrBuf, pTb);
10417 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10418
10419 /*
10420 * Apply fixups.
10421 */
10422 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10423 uint32_t const cFixups = pReNative->cFixups;
10424 for (uint32_t i = 0; i < cFixups; i++)
10425 {
10426 Assert(paFixups[i].off < off);
10427 Assert(paFixups[i].idxLabel < cLabels);
10428 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10429 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10430 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10431 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10432 switch (paFixups[i].enmType)
10433 {
10434#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10435 case kIemNativeFixupType_Rel32:
10436 Assert(paFixups[i].off + 4 <= off);
10437 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10438 continue;
10439
10440#elif defined(RT_ARCH_ARM64)
10441 case kIemNativeFixupType_RelImm26At0:
10442 {
10443 Assert(paFixups[i].off < off);
10444 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10445 Assert(offDisp >= -33554432 && offDisp < 33554432);
10446 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10447 continue;
10448 }
10449
10450 case kIemNativeFixupType_RelImm19At5:
10451 {
10452 Assert(paFixups[i].off < off);
10453 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10454 Assert(offDisp >= -262144 && offDisp < 262144);
10455 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10456 continue;
10457 }
10458
10459 case kIemNativeFixupType_RelImm14At5:
10460 {
10461 Assert(paFixups[i].off < off);
10462 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10463 Assert(offDisp >= -8192 && offDisp < 8192);
10464 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10465 continue;
10466 }
10467
10468#endif
10469 case kIemNativeFixupType_Invalid:
10470 case kIemNativeFixupType_End:
10471 break;
10472 }
10473 AssertFailed();
10474 }
10475
10476 /*
10477 * Apply TB exit fixups.
10478 */
10479 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10480 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10481 for (uint32_t i = 0; i < cTbExitFixups; i++)
10482 {
10483 Assert(paTbExitFixups[i].off < off);
10484 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10485 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10486
10487#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10488 Assert(paTbExitFixups[i].off + 4 <= off);
10489 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10490 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10491 *Ptr.pi32 = (int32_t)offDisp;
10492
10493#elif defined(RT_ARCH_ARM64)
10494 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10495 Assert(offDisp >= -33554432 && offDisp < 33554432);
10496 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10497
10498#else
10499# error "Port me!"
10500#endif
10501 }
10502
10503 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10504 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10505
10506 /*
10507 * Convert the translation block.
10508 */
10509 RTMemFree(pTb->Thrd.paCalls);
10510 pTb->Native.paInstructions = paFinalInstrBufRx;
10511 pTb->Native.cInstructions = off;
10512 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10513#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10514 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10515 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10516 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10517#endif
10518
10519 Assert(pTbAllocator->cThreadedTbs > 0);
10520 pTbAllocator->cThreadedTbs -= 1;
10521 pTbAllocator->cNativeTbs += 1;
10522 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10523
10524#ifdef LOG_ENABLED
10525 /*
10526 * Disassemble to the log if enabled.
10527 */
10528 if (LogIs3Enabled())
10529 {
10530 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10531 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10532# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10533 RTLogFlush(NULL);
10534# endif
10535 }
10536#endif
10537 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10538
10539 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10540 return pTb;
10541}
10542
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette