VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 106319

Last change on this file since 106319 was 106319, checked in by vboxsync, 6 weeks ago

VMM/IEM: Reduced parameter count for iemNativeRegAllocTmp*. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 455.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 106319 2024-10-15 08:50:24Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : Postponed and skipped EFLAGS calculations.
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80#include "target-x86/IEMAllN8veEmit-x86.h"
81
82
83/*
84 * Narrow down configs here to avoid wasting time on unused configs here.
85 * Note! Same checks in IEMAllThrdRecompiler.cpp.
86 */
87
88#ifndef IEM_WITH_CODE_TLB
89# error The code TLB must be enabled for the recompiler.
90#endif
91
92#ifndef IEM_WITH_DATA_TLB
93# error The data TLB must be enabled for the recompiler.
94#endif
95
96#ifndef IEM_WITH_SETJMP
97# error The setjmp approach must be enabled for the recompiler.
98#endif
99
100/** @todo eliminate this clang build hack. */
101#if RT_CLANG_PREREQ(4, 0)
102# pragma GCC diagnostic ignored "-Wunused-function"
103#endif
104
105
106/*********************************************************************************************************************************
107* Internal Functions *
108*********************************************************************************************************************************/
109#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
110static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
111#endif
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
113DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
114 IEMNATIVEGSTREG enmGstReg, uint32_t off);
115DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
116static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
117
118
119
120/*********************************************************************************************************************************
121* Native Recompilation *
122*********************************************************************************************************************************/
123
124
125/**
126 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
127 */
128IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
129{
130 pVCpu->iem.s.cInstructions += idxInstr;
131 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
132}
133
134
135/**
136 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
137 */
138DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
139{
140 uint64_t fCpu = pVCpu->fLocalForcedActions;
141 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
142 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
143 | VMCPU_FF_TLB_FLUSH
144 | VMCPU_FF_UNHALT );
145 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
146 if (RT_LIKELY( ( !fCpu
147 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
148 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
149 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
150 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
151 return false;
152 return true;
153}
154
155
156/**
157 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
158 */
159template <bool const a_fWithIrqCheck>
160IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
161 uint32_t fFlags, RTGCPHYS GCPhysPc))
162{
163 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
164 Assert(idxTbLookup < pTb->cTbLookupEntries);
165 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
166#if 1
167 PIEMTB const pNewTb = *ppNewTb;
168 if (pNewTb)
169 {
170# ifdef VBOX_STRICT
171 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
172 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
173 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
174 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
175 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
176 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
177# endif
178 if (pNewTb->GCPhysPc == GCPhysPc)
179 {
180# ifdef VBOX_STRICT
181 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
182 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
183 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
184 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
185 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
186# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
187 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
188# else
189 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
190 {
191 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
192 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
193 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
194 }
195# endif
196 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
197 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
198 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
199#endif
200
201 /*
202 * Check them + type.
203 */
204 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
205 {
206 /*
207 * Check for interrupts and stuff.
208 */
209 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
210 * The main problem are the statistics and to some degree the logging. :/ */
211 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
212 {
213 /* Do polling. */
214 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
215 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
216 {
217 /*
218 * Success. Update statistics and switch to the next TB.
219 */
220 if (a_fWithIrqCheck)
221 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
222 else
223 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
224
225 pNewTb->cUsed += 1;
226 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
227 pVCpu->iem.s.pCurTbR3 = pNewTb;
228 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
229 pVCpu->iem.s.cTbExecNative += 1;
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
231 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
232 return (uintptr_t)pNewTb->Native.paInstructions;
233 }
234 }
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
236 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
237 }
238 else
239 {
240 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
241 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
242 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
243 }
244 }
245 else
246 {
247 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
249 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
250 }
251 }
252 else
253 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
254#else
255 NOREF(GCPhysPc);
256#endif
257
258 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
259 return 0;
260}
261
262
263/**
264 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
265 */
266template <bool const a_fWithIrqCheck>
267IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
268{
269 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
270 Assert(idxTbLookup < pTb->cTbLookupEntries);
271 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
272#if 1
273 PIEMTB const pNewTb = *ppNewTb;
274 if (pNewTb)
275 {
276 /*
277 * Calculate the flags for the next TB and check if they match.
278 */
279 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
280 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
281 { /* likely */ }
282 else
283 {
284 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
285 fFlags |= IEMTB_F_INHIBIT_SHADOW;
286 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
287 fFlags |= IEMTB_F_INHIBIT_NMI;
288 }
289 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
290 {
291 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
292 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
293 { /* likely */ }
294 else
295 fFlags |= IEMTB_F_CS_LIM_CHECKS;
296 }
297 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
298
299 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
300 {
301 /*
302 * Do the TLB lookup for flat RIP and compare the result with the next TB.
303 *
304 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
305 */
306 /* Calc the effective PC. */
307 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
308 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
309 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
310
311 /* Advance within the current buffer (PAGE) when possible. */
312 RTGCPHYS GCPhysPc;
313 uint64_t off;
314 if ( pVCpu->iem.s.pbInstrBuf
315 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
316 {
317 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
318 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
319 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
320 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
321 else
322 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
323 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
324 }
325 else
326 {
327 pVCpu->iem.s.pbInstrBuf = NULL;
328 pVCpu->iem.s.offCurInstrStart = 0;
329 pVCpu->iem.s.offInstrNextByte = 0;
330 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
331 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
332 }
333
334 if (pNewTb->GCPhysPc == GCPhysPc)
335 {
336 /*
337 * Check for interrupts and stuff.
338 */
339 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
340 * The main problem are the statistics and to some degree the logging. :/ */
341 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
342 {
343 /* Do polling. */
344 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
345 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
346 {
347 /*
348 * Success. Update statistics and switch to the next TB.
349 */
350 if (a_fWithIrqCheck)
351 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
352 else
353 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
354
355 pNewTb->cUsed += 1;
356 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
357 pVCpu->iem.s.pCurTbR3 = pNewTb;
358 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
359 pVCpu->iem.s.cTbExecNative += 1;
360 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
361 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
362 return (uintptr_t)pNewTb->Native.paInstructions;
363 }
364 }
365 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
366 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
367 }
368 else
369 {
370 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
371 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
372 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
373 }
374 }
375 else
376 {
377 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
378 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
379 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
380 }
381 }
382 else
383 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
384#else
385 NOREF(fFlags);
386 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
387#endif
388
389 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
390 return 0;
391}
392
393
394/**
395 * Used by TB code when it wants to raise a \#DE.
396 */
397IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
398{
399 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
400 iemRaiseDivideErrorJmp(pVCpu);
401#ifndef _MSC_VER
402 return VINF_IEM_RAISED_XCPT; /* not reached */
403#endif
404}
405
406
407/**
408 * Used by TB code when it wants to raise a \#UD.
409 */
410IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
411{
412 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
413 iemRaiseUndefinedOpcodeJmp(pVCpu);
414#ifndef _MSC_VER
415 return VINF_IEM_RAISED_XCPT; /* not reached */
416#endif
417}
418
419
420/**
421 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
422 *
423 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
424 */
425IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
426{
427 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
428 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
429 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
430 iemRaiseUndefinedOpcodeJmp(pVCpu);
431 else
432 iemRaiseDeviceNotAvailableJmp(pVCpu);
433#ifndef _MSC_VER
434 return VINF_IEM_RAISED_XCPT; /* not reached */
435#endif
436}
437
438
439/**
440 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
441 *
442 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
443 */
444IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
445{
446 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
447 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
448 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
449 iemRaiseUndefinedOpcodeJmp(pVCpu);
450 else
451 iemRaiseDeviceNotAvailableJmp(pVCpu);
452#ifndef _MSC_VER
453 return VINF_IEM_RAISED_XCPT; /* not reached */
454#endif
455}
456
457
458/**
459 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
460 *
461 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
462 */
463IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
464{
465 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
466 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
467 iemRaiseSimdFpExceptionJmp(pVCpu);
468 else
469 iemRaiseUndefinedOpcodeJmp(pVCpu);
470#ifndef _MSC_VER
471 return VINF_IEM_RAISED_XCPT; /* not reached */
472#endif
473}
474
475
476/**
477 * Used by TB code when it wants to raise a \#NM.
478 */
479IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
480{
481 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
482 iemRaiseDeviceNotAvailableJmp(pVCpu);
483#ifndef _MSC_VER
484 return VINF_IEM_RAISED_XCPT; /* not reached */
485#endif
486}
487
488
489/**
490 * Used by TB code when it wants to raise a \#GP(0).
491 */
492IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
493{
494 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
495 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
496#ifndef _MSC_VER
497 return VINF_IEM_RAISED_XCPT; /* not reached */
498#endif
499}
500
501
502/**
503 * Used by TB code when it wants to raise a \#MF.
504 */
505IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
506{
507 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
508 iemRaiseMathFaultJmp(pVCpu);
509#ifndef _MSC_VER
510 return VINF_IEM_RAISED_XCPT; /* not reached */
511#endif
512}
513
514
515/**
516 * Used by TB code when it wants to raise a \#XF.
517 */
518IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
519{
520 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
521 iemRaiseSimdFpExceptionJmp(pVCpu);
522#ifndef _MSC_VER
523 return VINF_IEM_RAISED_XCPT; /* not reached */
524#endif
525}
526
527
528/**
529 * Used by TB code when detecting opcode changes.
530 * @see iemThreadeFuncWorkerObsoleteTb
531 */
532IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
533{
534 /* We set fSafeToFree to false where as we're being called in the context
535 of a TB callback function, which for native TBs means we cannot release
536 the executable memory till we've returned our way back to iemTbExec as
537 that return path codes via the native code generated for the TB. */
538 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
539 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
540 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
541 return VINF_IEM_REEXEC_BREAK;
542}
543
544
545/**
546 * Used by TB code when we need to switch to a TB with CS.LIM checking.
547 */
548IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
549{
550 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
551 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
552 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
553 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
554 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
555 return VINF_IEM_REEXEC_BREAK;
556}
557
558
559/**
560 * Used by TB code when we missed a PC check after a branch.
561 */
562IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
563{
564 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
565 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
566 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
567 pVCpu->iem.s.pbInstrBuf));
568 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
569 return VINF_IEM_REEXEC_BREAK;
570}
571
572
573
574/*********************************************************************************************************************************
575* Helpers: Segmented memory fetches and stores. *
576*********************************************************************************************************************************/
577
578/**
579 * Used by TB code to load unsigned 8-bit data w/ segmentation.
580 */
581IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
582{
583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
584 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
585#else
586 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
587#endif
588}
589
590
591/**
592 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
593 * to 16 bits.
594 */
595IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
596{
597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
598 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
599#else
600 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
601#endif
602}
603
604
605/**
606 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
607 * to 32 bits.
608 */
609IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
610{
611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
612 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
613#else
614 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
615#endif
616}
617
618/**
619 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
620 * to 64 bits.
621 */
622IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
623{
624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
625 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
626#else
627 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
628#endif
629}
630
631
632/**
633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
634 */
635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
636{
637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
638 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
639#else
640 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
641#endif
642}
643
644
645/**
646 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
647 * to 32 bits.
648 */
649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
650{
651#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
652 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
653#else
654 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
655#endif
656}
657
658
659/**
660 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
661 * to 64 bits.
662 */
663IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
664{
665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
666 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
667#else
668 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
669#endif
670}
671
672
673/**
674 * Used by TB code to load unsigned 32-bit data w/ segmentation.
675 */
676IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
677{
678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
679 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
680#else
681 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
682#endif
683}
684
685
686/**
687 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
688 * to 64 bits.
689 */
690IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
691{
692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
693 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
694#else
695 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
696#endif
697}
698
699
700/**
701 * Used by TB code to load unsigned 64-bit data w/ segmentation.
702 */
703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
704{
705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
706 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
707#else
708 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
709#endif
710}
711
712
713#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
714/**
715 * Used by TB code to load 128-bit data w/ segmentation.
716 */
717IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
721#else
722 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load 128-bit data w/ segmentation.
729 */
730IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
731{
732#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
733 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
734#else
735 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
736#endif
737}
738
739
740/**
741 * Used by TB code to load 128-bit data w/ segmentation.
742 */
743IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
744{
745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
746 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
747#else
748 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
749#endif
750}
751
752
753/**
754 * Used by TB code to load 256-bit data w/ segmentation.
755 */
756IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
757{
758#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
759 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
760#else
761 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
762#endif
763}
764
765
766/**
767 * Used by TB code to load 256-bit data w/ segmentation.
768 */
769IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
770{
771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
772 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
773#else
774 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
775#endif
776}
777#endif
778
779
780/**
781 * Used by TB code to store unsigned 8-bit data w/ segmentation.
782 */
783IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
784{
785#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
786 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
787#else
788 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
789#endif
790}
791
792
793/**
794 * Used by TB code to store unsigned 16-bit data w/ segmentation.
795 */
796IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
797{
798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
799 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
800#else
801 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
802#endif
803}
804
805
806/**
807 * Used by TB code to store unsigned 32-bit data w/ segmentation.
808 */
809IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
810{
811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
812 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
813#else
814 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
815#endif
816}
817
818
819/**
820 * Used by TB code to store unsigned 64-bit data w/ segmentation.
821 */
822IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
823{
824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
825 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
826#else
827 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
828#endif
829}
830
831
832#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
833/**
834 * Used by TB code to store unsigned 128-bit data w/ segmentation.
835 */
836IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
837{
838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
839 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
840#else
841 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
842#endif
843}
844
845
846/**
847 * Used by TB code to store unsigned 128-bit data w/ segmentation.
848 */
849IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
850{
851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
852 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
853#else
854 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
855#endif
856}
857
858
859/**
860 * Used by TB code to store unsigned 256-bit data w/ segmentation.
861 */
862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
863{
864#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
865 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
866#else
867 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
868#endif
869}
870
871
872/**
873 * Used by TB code to store unsigned 256-bit data w/ segmentation.
874 */
875IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
876{
877#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
878 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
879#else
880 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
881#endif
882}
883#endif
884
885
886
887/**
888 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
889 */
890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
891{
892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
893 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
894#else
895 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
896#endif
897}
898
899
900/**
901 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
902 */
903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
904{
905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
906 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
907#else
908 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
909#endif
910}
911
912
913/**
914 * Used by TB code to store an 32-bit selector value onto a generic stack.
915 *
916 * Intel CPUs doesn't do write a whole dword, thus the special function.
917 */
918IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
919{
920#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
921 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
922#else
923 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
924#endif
925}
926
927
928/**
929 * Used by TB code to push unsigned 64-bit value onto a generic stack.
930 */
931IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
932{
933#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
934 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
935#else
936 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
937#endif
938}
939
940
941/**
942 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
943 */
944IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
945{
946#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
947 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
948#else
949 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
950#endif
951}
952
953
954/**
955 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
956 */
957IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
958{
959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
960 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
961#else
962 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
963#endif
964}
965
966
967/**
968 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
969 */
970IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
971{
972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
973 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
974#else
975 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
976#endif
977}
978
979
980
981/*********************************************************************************************************************************
982* Helpers: Flat memory fetches and stores. *
983*********************************************************************************************************************************/
984
985/**
986 * Used by TB code to load unsigned 8-bit data w/ flat address.
987 * @note Zero extending the value to 64-bit to simplify assembly.
988 */
989IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
990{
991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
992 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
993#else
994 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
995#endif
996}
997
998
999/**
1000 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1001 * to 16 bits.
1002 * @note Zero extending the value to 64-bit to simplify assembly.
1003 */
1004IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1005{
1006#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1007 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1008#else
1009 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1010#endif
1011}
1012
1013
1014/**
1015 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1016 * to 32 bits.
1017 * @note Zero extending the value to 64-bit to simplify assembly.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1022 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1023#else
1024 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1025#endif
1026}
1027
1028
1029/**
1030 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1031 * to 64 bits.
1032 */
1033IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1034{
1035#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1036 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1037#else
1038 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1039#endif
1040}
1041
1042
1043/**
1044 * Used by TB code to load unsigned 16-bit data w/ flat address.
1045 * @note Zero extending the value to 64-bit to simplify assembly.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1050 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1051#else
1052 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1059 * to 32 bits.
1060 * @note Zero extending the value to 64-bit to simplify assembly.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1065 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1066#else
1067 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1074 * to 64 bits.
1075 * @note Zero extending the value to 64-bit to simplify assembly.
1076 */
1077IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1078{
1079#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1080 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1081#else
1082 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1083#endif
1084}
1085
1086
1087/**
1088 * Used by TB code to load unsigned 32-bit data w/ flat address.
1089 * @note Zero extending the value to 64-bit to simplify assembly.
1090 */
1091IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1092{
1093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1094 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1095#else
1096 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1097#endif
1098}
1099
1100
1101/**
1102 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1103 * to 64 bits.
1104 * @note Zero extending the value to 64-bit to simplify assembly.
1105 */
1106IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1107{
1108#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1109 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1110#else
1111 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1112#endif
1113}
1114
1115
1116/**
1117 * Used by TB code to load unsigned 64-bit data w/ flat address.
1118 */
1119IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1120{
1121#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1122 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1123#else
1124 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1125#endif
1126}
1127
1128
1129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1130/**
1131 * Used by TB code to load unsigned 128-bit data w/ flat address.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1134{
1135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1136 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1137#else
1138 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1139#endif
1140}
1141
1142
1143/**
1144 * Used by TB code to load unsigned 128-bit data w/ flat address.
1145 */
1146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1147{
1148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1149 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1150#else
1151 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1152#endif
1153}
1154
1155
1156/**
1157 * Used by TB code to load unsigned 128-bit data w/ flat address.
1158 */
1159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1160{
1161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1162 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1163#else
1164 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1165#endif
1166}
1167
1168
1169/**
1170 * Used by TB code to load unsigned 256-bit data w/ flat address.
1171 */
1172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1173{
1174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1175 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1176#else
1177 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1178#endif
1179}
1180
1181
1182/**
1183 * Used by TB code to load unsigned 256-bit data w/ flat address.
1184 */
1185IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1186{
1187#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1188 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1189#else
1190 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1191#endif
1192}
1193#endif
1194
1195
1196/**
1197 * Used by TB code to store unsigned 8-bit data w/ flat address.
1198 */
1199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1200{
1201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1202 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1203#else
1204 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1205#endif
1206}
1207
1208
1209/**
1210 * Used by TB code to store unsigned 16-bit data w/ flat address.
1211 */
1212IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1213{
1214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1215 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1216#else
1217 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1218#endif
1219}
1220
1221
1222/**
1223 * Used by TB code to store unsigned 32-bit data w/ flat address.
1224 */
1225IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1226{
1227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1228 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1229#else
1230 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1231#endif
1232}
1233
1234
1235/**
1236 * Used by TB code to store unsigned 64-bit data w/ flat address.
1237 */
1238IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1239{
1240#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1241 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1242#else
1243 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1244#endif
1245}
1246
1247
1248#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1249/**
1250 * Used by TB code to store unsigned 128-bit data w/ flat address.
1251 */
1252IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1253{
1254#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1255 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1256#else
1257 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1258#endif
1259}
1260
1261
1262/**
1263 * Used by TB code to store unsigned 128-bit data w/ flat address.
1264 */
1265IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1266{
1267#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1268 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1269#else
1270 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1271#endif
1272}
1273
1274
1275/**
1276 * Used by TB code to store unsigned 256-bit data w/ flat address.
1277 */
1278IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1279{
1280#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1281 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1282#else
1283 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1284#endif
1285}
1286
1287
1288/**
1289 * Used by TB code to store unsigned 256-bit data w/ flat address.
1290 */
1291IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1292{
1293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1294 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1295#else
1296 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1297#endif
1298}
1299#endif
1300
1301
1302
1303/**
1304 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1305 */
1306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1307{
1308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1309 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1310#else
1311 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1312#endif
1313}
1314
1315
1316/**
1317 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1318 */
1319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1322 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1323#else
1324 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to store a segment selector value onto a flat stack.
1331 *
1332 * Intel CPUs doesn't do write a whole dword, thus the special function.
1333 */
1334IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1335{
1336#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1337 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1338#else
1339 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1340#endif
1341}
1342
1343
1344/**
1345 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1346 */
1347IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1350 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1351#else
1352 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1361{
1362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1363 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1364#else
1365 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1366#endif
1367}
1368
1369
1370/**
1371 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1372 */
1373IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1374{
1375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1376 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1377#else
1378 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1379#endif
1380}
1381
1382
1383/**
1384 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1385 */
1386IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1387{
1388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1389 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1390#else
1391 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1392#endif
1393}
1394
1395
1396
1397/*********************************************************************************************************************************
1398* Helpers: Segmented memory mapping. *
1399*********************************************************************************************************************************/
1400
1401/**
1402 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1403 * segmentation.
1404 */
1405IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1406 RTGCPTR GCPtrMem, uint8_t iSegReg))
1407{
1408#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1409 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#else
1411 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1412#endif
1413}
1414
1415
1416/**
1417 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1418 */
1419IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1420 RTGCPTR GCPtrMem, uint8_t iSegReg))
1421{
1422#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1423 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#else
1425 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1426#endif
1427}
1428
1429
1430/**
1431 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1432 */
1433IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1434 RTGCPTR GCPtrMem, uint8_t iSegReg))
1435{
1436#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1437 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#else
1439 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1440#endif
1441}
1442
1443
1444/**
1445 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1446 */
1447IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1448 RTGCPTR GCPtrMem, uint8_t iSegReg))
1449{
1450#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1451 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1452#else
1453 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1454#endif
1455}
1456
1457
1458/**
1459 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1460 * segmentation.
1461 */
1462IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1463 RTGCPTR GCPtrMem, uint8_t iSegReg))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1467#else
1468 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1477 RTGCPTR GCPtrMem, uint8_t iSegReg))
1478{
1479#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1480 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1481#else
1482 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1483#endif
1484}
1485
1486
1487/**
1488 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1489 */
1490IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1491 RTGCPTR GCPtrMem, uint8_t iSegReg))
1492{
1493#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1494 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1495#else
1496 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1497#endif
1498}
1499
1500
1501/**
1502 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1503 */
1504IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1505 RTGCPTR GCPtrMem, uint8_t iSegReg))
1506{
1507#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1508 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1509#else
1510 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1511#endif
1512}
1513
1514
1515/**
1516 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1517 * segmentation.
1518 */
1519IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1520 RTGCPTR GCPtrMem, uint8_t iSegReg))
1521{
1522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1523 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1524#else
1525 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1526#endif
1527}
1528
1529
1530/**
1531 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1532 */
1533IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1534 RTGCPTR GCPtrMem, uint8_t iSegReg))
1535{
1536#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1537 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1538#else
1539 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1540#endif
1541}
1542
1543
1544/**
1545 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1546 */
1547IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1548 RTGCPTR GCPtrMem, uint8_t iSegReg))
1549{
1550#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1551 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1552#else
1553 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1554#endif
1555}
1556
1557
1558/**
1559 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1560 */
1561IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1562 RTGCPTR GCPtrMem, uint8_t iSegReg))
1563{
1564#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1565 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1566#else
1567 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1568#endif
1569}
1570
1571
1572/**
1573 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1574 * segmentation.
1575 */
1576IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1577 RTGCPTR GCPtrMem, uint8_t iSegReg))
1578{
1579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1580 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1581#else
1582 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1583#endif
1584}
1585
1586
1587/**
1588 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1591 RTGCPTR GCPtrMem, uint8_t iSegReg))
1592{
1593#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1594 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1595#else
1596 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1597#endif
1598}
1599
1600
1601/**
1602 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1603 */
1604IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1605 RTGCPTR GCPtrMem, uint8_t iSegReg))
1606{
1607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1608 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1609#else
1610 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1619 RTGCPTR GCPtrMem, uint8_t iSegReg))
1620{
1621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1622 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1623#else
1624 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1633 RTGCPTR GCPtrMem, uint8_t iSegReg))
1634{
1635#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1636 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1637#else
1638 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1639#endif
1640}
1641
1642
1643/**
1644 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1647 RTGCPTR GCPtrMem, uint8_t iSegReg))
1648{
1649#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1650 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1651#else
1652 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1653#endif
1654}
1655
1656
1657/**
1658 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1659 * segmentation.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1662 RTGCPTR GCPtrMem, uint8_t iSegReg))
1663{
1664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1665 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1666#else
1667 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1668#endif
1669}
1670
1671
1672/**
1673 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1674 */
1675IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1676 RTGCPTR GCPtrMem, uint8_t iSegReg))
1677{
1678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1679 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1680#else
1681 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1682#endif
1683}
1684
1685
1686/**
1687 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1690 RTGCPTR GCPtrMem, uint8_t iSegReg))
1691{
1692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1693 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1694#else
1695 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1696#endif
1697}
1698
1699
1700/**
1701 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1704 RTGCPTR GCPtrMem, uint8_t iSegReg))
1705{
1706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1707 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1708#else
1709 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1710#endif
1711}
1712
1713
1714/*********************************************************************************************************************************
1715* Helpers: Flat memory mapping. *
1716*********************************************************************************************************************************/
1717
1718/**
1719 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1720 * address.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1723{
1724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1725 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1726#else
1727 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1728#endif
1729}
1730
1731
1732/**
1733 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1736{
1737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1738 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1739#else
1740 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1741#endif
1742}
1743
1744
1745/**
1746 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1747 */
1748IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1749{
1750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1751 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1752#else
1753 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1754#endif
1755}
1756
1757
1758/**
1759 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1764 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1765#else
1766 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1773 * address.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1778 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1779#else
1780 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1791 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1792#else
1793 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1802{
1803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1804 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1805#else
1806 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1807#endif
1808}
1809
1810
1811/**
1812 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1813 */
1814IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1815{
1816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1817 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1818#else
1819 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1820#endif
1821}
1822
1823
1824/**
1825 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1826 * address.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1831 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1832#else
1833 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1844 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1845#else
1846 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1857 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1858#else
1859 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1870 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1871#else
1872 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1879 * address.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1884 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1885#else
1886 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1897 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1898#else
1899 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1910 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1911#else
1912 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1923 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1924#else
1925 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1936 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1937#else
1938 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1949 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1950#else
1951 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1952#endif
1953}
1954
1955
1956/**
1957 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1958 * address.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1963 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1964#else
1965 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1976 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1977#else
1978 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1989 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1990#else
1991 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2002 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2003#else
2004 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2005#endif
2006}
2007
2008
2009/*********************************************************************************************************************************
2010* Helpers: Commit, rollback & unmap *
2011*********************************************************************************************************************************/
2012
2013/**
2014 * Used by TB code to commit and unmap a read-write memory mapping.
2015 */
2016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2017{
2018 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2019}
2020
2021
2022/**
2023 * Used by TB code to commit and unmap a read-write memory mapping.
2024 */
2025IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2026{
2027 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2028}
2029
2030
2031/**
2032 * Used by TB code to commit and unmap a write-only memory mapping.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2035{
2036 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2037}
2038
2039
2040/**
2041 * Used by TB code to commit and unmap a read-only memory mapping.
2042 */
2043IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2044{
2045 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2046}
2047
2048
2049/**
2050 * Reinitializes the native recompiler state.
2051 *
2052 * Called before starting a new recompile job.
2053 */
2054static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2055{
2056 pReNative->cLabels = 0;
2057 pReNative->bmLabelTypes = 0;
2058 pReNative->cFixups = 0;
2059 pReNative->cTbExitFixups = 0;
2060#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2061 pReNative->pDbgInfo->cEntries = 0;
2062 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2063#endif
2064 pReNative->pTbOrg = pTb;
2065 pReNative->cCondDepth = 0;
2066 pReNative->uCondSeqNo = 0;
2067 pReNative->uCheckIrqSeqNo = 0;
2068 pReNative->uTlbSeqNo = 0;
2069#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2070 pReNative->fSkippingEFlags = 0;
2071#endif
2072#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2073 pReNative->PostponedEfl.fEFlags = 0;
2074 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid;
2075 pReNative->PostponedEfl.cOpBits = 0;
2076 pReNative->PostponedEfl.idxReg1 = UINT8_MAX;
2077 pReNative->PostponedEfl.idxReg2 = UINT8_MAX;
2078#endif
2079
2080#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2081 pReNative->Core.offPc = 0;
2082# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2083 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2084# endif
2085# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2086 pReNative->Core.fDebugPcInitialized = false;
2087# endif
2088#endif
2089#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2090 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2091#endif
2092 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2093#if IEMNATIVE_HST_GREG_COUNT < 32
2094 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2095#endif
2096 ;
2097 pReNative->Core.bmHstRegsWithGstShadow = 0;
2098 pReNative->Core.bmGstRegShadows = 0;
2099#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2100 pReNative->Core.bmGstRegShadowDirty = 0;
2101#endif
2102 pReNative->Core.bmVars = 0;
2103 pReNative->Core.bmStack = 0;
2104 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2105 pReNative->Core.u64ArgVars = UINT64_MAX;
2106
2107 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2108 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2120 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2121 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2122 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2123 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2124 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2125 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2126 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2127 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2128 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2129 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2130 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2131
2132 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2133
2134 /* Full host register reinit: */
2135 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2136 {
2137 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2138 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2139 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2140 }
2141
2142 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2143 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2144#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2145 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2146#endif
2147#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2148 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2149#endif
2150#ifdef IEMNATIVE_REG_FIXED_TMP1
2151 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2152#endif
2153#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2154 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2155#endif
2156 );
2157 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2158 {
2159 fRegs &= ~RT_BIT_32(idxReg);
2160 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2161 }
2162
2163 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2164#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2165 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2166#endif
2167#ifdef IEMNATIVE_REG_FIXED_TMP0
2168 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2169#endif
2170#ifdef IEMNATIVE_REG_FIXED_TMP1
2171 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2172#endif
2173#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2174 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2175#endif
2176
2177#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2178 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2179# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2180 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2181# endif
2182 ;
2183 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2184 pReNative->Core.bmGstSimdRegShadows = 0;
2185 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2186 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2187
2188 /* Full host register reinit: */
2189 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2190 {
2191 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2192 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2193 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2194 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2195 }
2196
2197 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2198 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2199 {
2200 fRegs &= ~RT_BIT_32(idxReg);
2201 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2202 }
2203
2204#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2205 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2206#endif
2207
2208#endif
2209
2210 return pReNative;
2211}
2212
2213
2214/**
2215 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2216 */
2217static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2218{
2219 RTMemFree(pReNative->pInstrBuf);
2220 RTMemFree(pReNative->paLabels);
2221 RTMemFree(pReNative->paFixups);
2222 RTMemFree(pReNative->paTbExitFixups);
2223#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2224 RTMemFree(pReNative->pDbgInfo);
2225#endif
2226 RTMemFree(pReNative);
2227}
2228
2229
2230/**
2231 * Allocates and initializes the native recompiler state.
2232 *
2233 * This is called the first time an EMT wants to recompile something.
2234 *
2235 * @returns Pointer to the new recompiler state.
2236 * @param pVCpu The cross context virtual CPU structure of the calling
2237 * thread.
2238 * @param pTb The TB that's about to be recompiled. When this is NULL,
2239 * the recompiler state is for emitting the common per-chunk
2240 * code from iemNativeRecompileAttachExecMemChunkCtx.
2241 * @thread EMT(pVCpu)
2242 */
2243static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2244{
2245 VMCPU_ASSERT_EMT(pVCpu);
2246
2247 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2248 AssertReturn(pReNative, NULL);
2249
2250 /*
2251 * Try allocate all the buffers and stuff we need.
2252 */
2253 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2254 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2255 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2256 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2257 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2258#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2259 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2260#endif
2261 if (RT_LIKELY( pReNative->pInstrBuf
2262 && pReNative->paLabels
2263 && pReNative->paFixups
2264 && pReNative->paTbExitFixups)
2265#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2266 && pReNative->pDbgInfo
2267#endif
2268 )
2269 {
2270 /*
2271 * Set the buffer & array sizes on success.
2272 */
2273 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2274 pReNative->cLabelsAlloc = _8K / cFactor;
2275 pReNative->cFixupsAlloc = _16K / cFactor;
2276 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2277#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2278 pReNative->cDbgInfoAlloc = _16K / cFactor;
2279#endif
2280
2281 /* Other constant stuff: */
2282 pReNative->pVCpu = pVCpu;
2283
2284 /*
2285 * Done, just reinit it.
2286 */
2287 return iemNativeReInit(pReNative, pTb);
2288 }
2289
2290 /*
2291 * Failed. Cleanup and return.
2292 */
2293 AssertFailed();
2294 iemNativeTerm(pReNative);
2295 return NULL;
2296}
2297
2298
2299/**
2300 * Creates a label
2301 *
2302 * If the label does not yet have a defined position,
2303 * call iemNativeLabelDefine() later to set it.
2304 *
2305 * @returns Label ID. Throws VBox status code on failure, so no need to check
2306 * the return value.
2307 * @param pReNative The native recompile state.
2308 * @param enmType The label type.
2309 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2310 * label is not yet defined (default).
2311 * @param uData Data associated with the lable. Only applicable to
2312 * certain type of labels. Default is zero.
2313 */
2314DECL_HIDDEN_THROW(uint32_t)
2315iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2316 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2317{
2318 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2319#if defined(RT_ARCH_AMD64)
2320 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2321#endif
2322
2323 /*
2324 * Locate existing label definition.
2325 *
2326 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2327 * and uData is zero.
2328 */
2329 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2330 uint32_t const cLabels = pReNative->cLabels;
2331 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2332#ifndef VBOX_STRICT
2333 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2334 && offWhere == UINT32_MAX
2335 && uData == 0
2336#endif
2337 )
2338 {
2339#ifndef VBOX_STRICT
2340 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2342 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2343 if (idxLabel < pReNative->cLabels)
2344 return idxLabel;
2345#else
2346 for (uint32_t i = 0; i < cLabels; i++)
2347 if ( paLabels[i].enmType == enmType
2348 && paLabels[i].uData == uData)
2349 {
2350 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2351 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2352 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2353 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2354 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2355 return i;
2356 }
2357 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2358 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2359#endif
2360 }
2361
2362 /*
2363 * Make sure we've got room for another label.
2364 */
2365 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2366 { /* likely */ }
2367 else
2368 {
2369 uint32_t cNew = pReNative->cLabelsAlloc;
2370 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2371 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2372 cNew *= 2;
2373 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2374 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2375 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2376 pReNative->paLabels = paLabels;
2377 pReNative->cLabelsAlloc = cNew;
2378 }
2379
2380 /*
2381 * Define a new label.
2382 */
2383 paLabels[cLabels].off = offWhere;
2384 paLabels[cLabels].enmType = enmType;
2385 paLabels[cLabels].uData = uData;
2386 pReNative->cLabels = cLabels + 1;
2387
2388 Assert((unsigned)enmType < 64);
2389 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2390
2391 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2392 {
2393 Assert(uData == 0);
2394 pReNative->aidxUniqueLabels[enmType] = cLabels;
2395 }
2396
2397 if (offWhere != UINT32_MAX)
2398 {
2399#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2400 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2401 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2402#endif
2403 }
2404 return cLabels;
2405}
2406
2407
2408/**
2409 * Defines the location of an existing label.
2410 *
2411 * @param pReNative The native recompile state.
2412 * @param idxLabel The label to define.
2413 * @param offWhere The position.
2414 */
2415DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2416{
2417 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2418 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2419 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2420 pLabel->off = offWhere;
2421#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2422 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2423 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2424#endif
2425}
2426
2427
2428/**
2429 * Looks up a lable.
2430 *
2431 * @returns Label ID if found, UINT32_MAX if not.
2432 */
2433DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2434 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2435{
2436 Assert((unsigned)enmType < 64);
2437 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2438 {
2439 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2440 return pReNative->aidxUniqueLabels[enmType];
2441
2442 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2443 uint32_t const cLabels = pReNative->cLabels;
2444 for (uint32_t i = 0; i < cLabels; i++)
2445 if ( paLabels[i].enmType == enmType
2446 && paLabels[i].uData == uData
2447 && ( paLabels[i].off == offWhere
2448 || offWhere == UINT32_MAX
2449 || paLabels[i].off == UINT32_MAX))
2450 return i;
2451 }
2452 return UINT32_MAX;
2453}
2454
2455
2456/**
2457 * Adds a fixup.
2458 *
2459 * @throws VBox status code (int) on failure.
2460 * @param pReNative The native recompile state.
2461 * @param offWhere The instruction offset of the fixup location.
2462 * @param idxLabel The target label ID for the fixup.
2463 * @param enmType The fixup type.
2464 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2465 */
2466DECL_HIDDEN_THROW(void)
2467iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2468 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2469{
2470 Assert(idxLabel <= UINT16_MAX);
2471 Assert((unsigned)enmType <= UINT8_MAX);
2472#ifdef RT_ARCH_ARM64
2473 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2474 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2476#endif
2477
2478 /*
2479 * Make sure we've room.
2480 */
2481 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2482 uint32_t const cFixups = pReNative->cFixups;
2483 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2484 { /* likely */ }
2485 else
2486 {
2487 uint32_t cNew = pReNative->cFixupsAlloc;
2488 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2489 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2490 cNew *= 2;
2491 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2492 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2493 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2494 pReNative->paFixups = paFixups;
2495 pReNative->cFixupsAlloc = cNew;
2496 }
2497
2498 /*
2499 * Add the fixup.
2500 */
2501 paFixups[cFixups].off = offWhere;
2502 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2503 paFixups[cFixups].enmType = enmType;
2504 paFixups[cFixups].offAddend = offAddend;
2505 pReNative->cFixups = cFixups + 1;
2506}
2507
2508
2509/**
2510 * Adds a fixup to the per chunk tail code.
2511 *
2512 * @throws VBox status code (int) on failure.
2513 * @param pReNative The native recompile state.
2514 * @param offWhere The instruction offset of the fixup location.
2515 * @param enmExitReason The exit reason to jump to.
2516 */
2517DECL_HIDDEN_THROW(void)
2518iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2519{
2520 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2521
2522 /*
2523 * Make sure we've room.
2524 */
2525 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2526 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2527 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2528 { /* likely */ }
2529 else
2530 {
2531 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2532 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2533 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2534 cNew *= 2;
2535 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2536 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2537 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2538 pReNative->paTbExitFixups = paTbExitFixups;
2539 pReNative->cTbExitFixupsAlloc = cNew;
2540 }
2541
2542 /*
2543 * Add the fixup.
2544 */
2545 paTbExitFixups[cTbExitFixups].off = offWhere;
2546 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2547 pReNative->cTbExitFixups = cTbExitFixups + 1;
2548}
2549
2550
2551/**
2552 * Slow code path for iemNativeInstrBufEnsure.
2553 */
2554DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2555{
2556 /* Double the buffer size till we meet the request. */
2557 uint32_t cNew = pReNative->cInstrBufAlloc;
2558 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2559 do
2560 cNew *= 2;
2561 while (cNew < off + cInstrReq);
2562
2563 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2564#ifdef RT_ARCH_ARM64
2565 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2566#else
2567 uint32_t const cbMaxInstrBuf = _2M;
2568#endif
2569 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2570
2571 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2572 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2573
2574#ifdef VBOX_STRICT
2575 pReNative->offInstrBufChecked = off + cInstrReq;
2576#endif
2577 pReNative->cInstrBufAlloc = cNew;
2578 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2579}
2580
2581#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2582
2583/**
2584 * Grows the static debug info array used during recompilation.
2585 *
2586 * @returns Pointer to the new debug info block; throws VBox status code on
2587 * failure, so no need to check the return value.
2588 */
2589DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2590{
2591 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2592 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2593 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2594 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2595 pReNative->pDbgInfo = pDbgInfo;
2596 pReNative->cDbgInfoAlloc = cNew;
2597 return pDbgInfo;
2598}
2599
2600
2601/**
2602 * Adds a new debug info uninitialized entry, returning the pointer to it.
2603 */
2604DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2605{
2606 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2607 { /* likely */ }
2608 else
2609 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2610 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2611}
2612
2613
2614/**
2615 * Debug Info: Adds a native offset record, if necessary.
2616 */
2617DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2618{
2619 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2620
2621 /*
2622 * Do we need this one?
2623 */
2624 uint32_t const offPrev = pDbgInfo->offNativeLast;
2625 if (offPrev == off)
2626 return;
2627 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2628
2629 /*
2630 * Add it.
2631 */
2632 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2633 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2634 pEntry->NativeOffset.offNative = off;
2635 pDbgInfo->offNativeLast = off;
2636}
2637
2638
2639/**
2640 * Debug Info: Record info about a label.
2641 */
2642static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2643{
2644 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2645 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2646 pEntry->Label.uUnused = 0;
2647 pEntry->Label.enmLabel = (uint8_t)enmType;
2648 pEntry->Label.uData = uData;
2649}
2650
2651
2652/**
2653 * Debug Info: Record info about a threaded call.
2654 */
2655static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2656{
2657 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2658 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2659 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2660 pEntry->ThreadedCall.uUnused = 0;
2661 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2662}
2663
2664
2665/**
2666 * Debug Info: Record info about a new guest instruction.
2667 */
2668static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2669{
2670 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2671 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2672 pEntry->GuestInstruction.uUnused = 0;
2673 pEntry->GuestInstruction.fExec = fExec;
2674}
2675
2676
2677/**
2678 * Debug Info: Record info about guest register shadowing.
2679 */
2680DECL_HIDDEN_THROW(void)
2681iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2682 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2683{
2684 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2685 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2686 pEntry->GuestRegShadowing.uUnused = 0;
2687 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2688 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2689 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2690#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2691 Assert( idxHstReg != UINT8_MAX
2692 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2693#endif
2694}
2695
2696
2697# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2698/**
2699 * Debug Info: Record info about guest register shadowing.
2700 */
2701DECL_HIDDEN_THROW(void)
2702iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2703 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2704{
2705 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2706 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2707 pEntry->GuestSimdRegShadowing.uUnused = 0;
2708 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2709 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2710 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2711}
2712# endif
2713
2714
2715# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2716/**
2717 * Debug Info: Record info about delayed RIP updates.
2718 */
2719DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2720{
2721 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2722 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2723 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2724 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2725}
2726# endif
2727
2728# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2729
2730/**
2731 * Debug Info: Record info about a dirty guest register.
2732 */
2733DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2734 uint8_t idxGstReg, uint8_t idxHstReg)
2735{
2736 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2737 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2738 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2739 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2740 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2741}
2742
2743
2744/**
2745 * Debug Info: Record info about a dirty guest register writeback operation.
2746 */
2747DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2748{
2749 unsigned const cBitsGstRegMask = 25;
2750 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2751
2752 /* The first block of 25 bits: */
2753 if (fGstReg & fGstRegMask)
2754 {
2755 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2756 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2757 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2758 pEntry->GuestRegWriteback.cShift = 0;
2759 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2760 fGstReg &= ~(uint64_t)fGstRegMask;
2761 if (!fGstReg)
2762 return;
2763 }
2764
2765 /* The second block of 25 bits: */
2766 fGstReg >>= cBitsGstRegMask;
2767 if (fGstReg & fGstRegMask)
2768 {
2769 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2770 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2771 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2772 pEntry->GuestRegWriteback.cShift = 0;
2773 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2774 fGstReg &= ~(uint64_t)fGstRegMask;
2775 if (!fGstReg)
2776 return;
2777 }
2778
2779 /* The last block with 14 bits: */
2780 fGstReg >>= cBitsGstRegMask;
2781 Assert(fGstReg & fGstRegMask);
2782 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2783 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2784 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2785 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2786 pEntry->GuestRegWriteback.cShift = 2;
2787 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2788}
2789
2790# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2791
2792# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2793/**
2794 * Debug Info: Record info about emitting a postponed EFLAGS calculation.
2795 */
2796DECL_HIDDEN_THROW(void)
2797iemNativeDbgInfoAddPostponedEFlagsCalc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVE_POSTPONED_EFL_OP_T enmOp,
2798 uint8_t cOpBits, uint8_t idxEmit)
2799{
2800 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2801 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2802 pEntry->PostponedEflCalc.uType = kIemTbDbgEntryType_PostponedEFlagsCalc;
2803 pEntry->PostponedEflCalc.enmOp = (unsigned)enmOp;
2804 pEntry->PostponedEflCalc.cOpBits = cOpBits;
2805 pEntry->PostponedEflCalc.idxEmit = idxEmit;
2806 pEntry->PostponedEflCalc.uUnused = 0;
2807}
2808# endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */
2809
2810#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2811
2812
2813/*********************************************************************************************************************************
2814* Register Allocator *
2815*********************************************************************************************************************************/
2816
2817/**
2818 * Register parameter indexes (indexed by argument number).
2819 */
2820DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2821{
2822 IEMNATIVE_CALL_ARG0_GREG,
2823 IEMNATIVE_CALL_ARG1_GREG,
2824 IEMNATIVE_CALL_ARG2_GREG,
2825 IEMNATIVE_CALL_ARG3_GREG,
2826#if defined(IEMNATIVE_CALL_ARG4_GREG)
2827 IEMNATIVE_CALL_ARG4_GREG,
2828# if defined(IEMNATIVE_CALL_ARG5_GREG)
2829 IEMNATIVE_CALL_ARG5_GREG,
2830# if defined(IEMNATIVE_CALL_ARG6_GREG)
2831 IEMNATIVE_CALL_ARG6_GREG,
2832# if defined(IEMNATIVE_CALL_ARG7_GREG)
2833 IEMNATIVE_CALL_ARG7_GREG,
2834# endif
2835# endif
2836# endif
2837#endif
2838};
2839AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2840
2841/**
2842 * Call register masks indexed by argument count.
2843 */
2844DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2845{
2846 0,
2847 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2848 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2849 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2850 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2851 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2852#if defined(IEMNATIVE_CALL_ARG4_GREG)
2853 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2854 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2855# if defined(IEMNATIVE_CALL_ARG5_GREG)
2856 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2857 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2858# if defined(IEMNATIVE_CALL_ARG6_GREG)
2859 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2860 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2861 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2862# if defined(IEMNATIVE_CALL_ARG7_GREG)
2863 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2864 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2865 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2866# endif
2867# endif
2868# endif
2869#endif
2870};
2871
2872#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2873/**
2874 * BP offset of the stack argument slots.
2875 *
2876 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2877 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2878 */
2879DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2880{
2881 IEMNATIVE_FP_OFF_STACK_ARG0,
2882# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2883 IEMNATIVE_FP_OFF_STACK_ARG1,
2884# endif
2885# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2886 IEMNATIVE_FP_OFF_STACK_ARG2,
2887# endif
2888# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2889 IEMNATIVE_FP_OFF_STACK_ARG3,
2890# endif
2891};
2892AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2893#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2894
2895/**
2896 * Info about shadowed guest register values.
2897 * @see IEMNATIVEGSTREG
2898 */
2899DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2900{
2901#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2902 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2903 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2904 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2905 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2906 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2907 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2908 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2909 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2910 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2911 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2912 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2913 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2914 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2915 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2916 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2917 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2918 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2919 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2920 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2921 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2922 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2923 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2924 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2925 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2926 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2927 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2928 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2929 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2930 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2931 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2932 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2933 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2934 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2935 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2936 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2937 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2938 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2939 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2940 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2941 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2942 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2943 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2944 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2945 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2946 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2947 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2948 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2949 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2950 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2951 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2952 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2953 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2954 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2955 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2956#undef CPUMCTX_OFF_AND_SIZE
2957};
2958AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2959
2960
2961/** Host CPU general purpose register names. */
2962DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2963{
2964#ifdef RT_ARCH_AMD64
2965 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2966#elif RT_ARCH_ARM64
2967 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2968 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2969#else
2970# error "port me"
2971#endif
2972};
2973
2974
2975#if 0 /* unused */
2976/**
2977 * Tries to locate a suitable register in the given register mask.
2978 *
2979 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2980 * failed.
2981 *
2982 * @returns Host register number on success, returns UINT8_MAX on failure.
2983 */
2984static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2985{
2986 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2987 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2988 if (fRegs)
2989 {
2990 /** @todo pick better here: */
2991 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2992
2993 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2994 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2995 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2996 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2997
2998 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2999 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3000 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3001 return idxReg;
3002 }
3003 return UINT8_MAX;
3004}
3005#endif /* unused */
3006
3007#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3008
3009/**
3010 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
3011 *
3012 * @returns New code buffer offset on success, UINT32_MAX on failure.
3013 * @param pReNative .
3014 * @param off The current code buffer position.
3015 * @param enmGstReg The guest register to store to.
3016 * @param idxHstReg The host register to store from.
3017 */
3018DECL_FORCE_INLINE_THROW(uint32_t)
3019iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3020{
3021 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3022 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3023
3024 switch (g_aGstShadowInfo[enmGstReg].cb)
3025 {
3026 case sizeof(uint64_t):
3027 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3028 case sizeof(uint32_t):
3029 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3030 case sizeof(uint16_t):
3031 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3032# if 0 /* not present in the table. */
3033 case sizeof(uint8_t):
3034 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3035# endif
3036 default:
3037 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3038 }
3039}
3040
3041
3042/**
3043 * Emits code to flush a pending write of the given guest register,
3044 * version with alternative core state.
3045 *
3046 * @returns New code buffer offset.
3047 * @param pReNative The native recompile state.
3048 * @param off Current code buffer position.
3049 * @param pCore Alternative core state.
3050 * @param enmGstReg The guest register to flush.
3051 */
3052DECL_HIDDEN_THROW(uint32_t)
3053iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3054{
3055 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3056
3057 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3058 && enmGstReg <= kIemNativeGstReg_GprLast)
3059 || enmGstReg == kIemNativeGstReg_MxCsr);
3060 Assert( idxHstReg != UINT8_MAX
3061 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3062 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3063 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3064
3065 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3066
3067 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3068 return off;
3069}
3070
3071
3072/**
3073 * Emits code to flush a pending write of the given guest register.
3074 *
3075 * @returns New code buffer offset.
3076 * @param pReNative The native recompile state.
3077 * @param off Current code buffer position.
3078 * @param enmGstReg The guest register to flush.
3079 */
3080DECL_HIDDEN_THROW(uint32_t)
3081iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3082{
3083 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3084
3085 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3086 && enmGstReg <= kIemNativeGstReg_GprLast)
3087 || enmGstReg == kIemNativeGstReg_MxCsr);
3088 Assert( idxHstReg != UINT8_MAX
3089 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3090 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3091 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3092
3093 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3094
3095 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3096 return off;
3097}
3098
3099
3100/**
3101 * Flush the given set of guest registers if marked as dirty.
3102 *
3103 * @returns New code buffer offset.
3104 * @param pReNative The native recompile state.
3105 * @param off Current code buffer position.
3106 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3107 * @note Must not modify the host status flags!
3108 */
3109DECL_HIDDEN_THROW(uint32_t)
3110iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3111{
3112 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3113 if (bmGstRegShadowDirty)
3114 {
3115# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3116 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3117 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3118# endif
3119 do
3120 {
3121 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3122 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3123 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3124 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3125 } while (bmGstRegShadowDirty);
3126 }
3127
3128 return off;
3129}
3130
3131
3132/**
3133 * Flush all shadowed guest registers marked as dirty for the given host register.
3134 *
3135 * @returns New code buffer offset.
3136 * @param pReNative The native recompile state.
3137 * @param off Current code buffer position.
3138 * @param idxHstReg The host register.
3139 *
3140 * @note This doesn't do any unshadowing of guest registers from the host register.
3141 *
3142 * @note Must not modify the host status flags!
3143 */
3144DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3145{
3146 /* We need to flush any pending guest register writes this host register shadows. */
3147 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3148 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3149 {
3150# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3151 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3152 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3153# endif
3154 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3155 do
3156 {
3157 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3158 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3159 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3160 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3161 } while (bmGstRegShadowDirty);
3162 }
3163
3164 return off;
3165}
3166
3167#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3168
3169
3170/**
3171 * Locate a register, possibly freeing one up.
3172 *
3173 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3174 * failed.
3175 *
3176 * @returns Host register number on success. Returns UINT8_MAX if no registers
3177 * found, the caller is supposed to deal with this and raise a
3178 * allocation type specific status code (if desired).
3179 *
3180 * @throws VBox status code if we're run into trouble spilling a variable of
3181 * recording debug info. Does NOT throw anything if we're out of
3182 * registers, though.
3183 *
3184 * @note Must not modify the host status flags!
3185 */
3186static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3187 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3188{
3189 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3190 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3191 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3192
3193 /*
3194 * Try a freed register that's shadowing a guest register.
3195 */
3196 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3197 if (fRegs)
3198 {
3199 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3200
3201#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3202 /*
3203 * When we have liveness information, we use it to kick out all shadowed
3204 * guest register that will not be needed any more in this TB. If we're
3205 * lucky, this may prevent us from ending up here again.
3206 *
3207 * Note! We must consider the previous entry here so we don't free
3208 * anything that the current threaded function requires (current
3209 * entry is produced by the next threaded function).
3210 */
3211 uint32_t const idxCurCall = pReNative->idxCurCall;
3212 if (idxCurCall > 0)
3213 {
3214 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3215 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3216
3217 /* Merge EFLAGS. */
3218 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3219 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3220 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3221 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3222 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3223
3224 /* If it matches any shadowed registers. */
3225 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3226 {
3227#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3228 /* Writeback any dirty shadow registers we are about to unshadow. */
3229 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3230#endif
3231
3232 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3233 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3234 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3235
3236 /* See if we've got any unshadowed registers we can return now. */
3237 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3238 if (fUnshadowedRegs)
3239 {
3240 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3241 return (fPreferVolatile
3242 ? ASMBitFirstSetU32(fUnshadowedRegs)
3243 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3244 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3245 - 1;
3246 }
3247 }
3248 }
3249#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3250
3251 unsigned const idxReg = (fPreferVolatile
3252 ? ASMBitFirstSetU32(fRegs)
3253 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3254 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3255 - 1;
3256
3257 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3258 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3259 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3260 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3261
3262#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3263 /* We need to flush any pending guest register writes this host register shadows. */
3264 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3265#endif
3266
3267 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3268 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3269 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3270 return idxReg;
3271 }
3272
3273 /*
3274 * Try free up a variable that's in a register.
3275 *
3276 * We do two rounds here, first evacuating variables we don't need to be
3277 * saved on the stack, then in the second round move things to the stack.
3278 */
3279 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3280 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3281 {
3282 uint32_t fVars = pReNative->Core.bmVars;
3283 while (fVars)
3284 {
3285 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3286 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3287#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3288 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3289 continue;
3290#endif
3291
3292 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3293 && (RT_BIT_32(idxReg) & fRegMask)
3294 && ( iLoop == 0
3295 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3296 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3297 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3298 {
3299 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3300 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3301 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3302 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3303 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3304 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3305#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3306 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3307#endif
3308
3309 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3310 {
3311 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3312 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3313 }
3314
3315 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3316 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3317
3318 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3319 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3320 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3321 return idxReg;
3322 }
3323 fVars &= ~RT_BIT_32(idxVar);
3324 }
3325 }
3326
3327 return UINT8_MAX;
3328}
3329
3330
3331/**
3332 * Reassigns a variable to a different register specified by the caller.
3333 *
3334 * @returns The new code buffer position.
3335 * @param pReNative The native recompile state.
3336 * @param off The current code buffer position.
3337 * @param idxVar The variable index.
3338 * @param idxRegOld The old host register number.
3339 * @param idxRegNew The new host register number.
3340 * @param pszCaller The caller for logging.
3341 */
3342static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3343 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3344{
3345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3346 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3347#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3348 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3349#endif
3350 RT_NOREF(pszCaller);
3351
3352#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3353 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3354#endif
3355 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3356
3357 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3358#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3359 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3360#endif
3361 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3362 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3363 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3364
3365 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3366 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3367 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3368 if (fGstRegShadows)
3369 {
3370 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3371 | RT_BIT_32(idxRegNew);
3372 while (fGstRegShadows)
3373 {
3374 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3375 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3376
3377 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3378 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3379 }
3380 }
3381
3382 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3383 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3384 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3385 return off;
3386}
3387
3388
3389/**
3390 * Moves a variable to a different register or spills it onto the stack.
3391 *
3392 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3393 * kinds can easily be recreated if needed later.
3394 *
3395 * @returns The new code buffer position.
3396 * @param pReNative The native recompile state.
3397 * @param off The current code buffer position.
3398 * @param idxVar The variable index.
3399 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3400 * call-volatile registers.
3401 */
3402DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3403 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3404{
3405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3406 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3407 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3408 Assert(!pVar->fRegAcquired);
3409
3410 uint8_t const idxRegOld = pVar->idxReg;
3411 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3412 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3413 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3414 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3415 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3416 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3417 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3418 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3419#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3420 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3421#endif
3422
3423
3424 /** @todo Add statistics on this.*/
3425 /** @todo Implement basic variable liveness analysis (python) so variables
3426 * can be freed immediately once no longer used. This has the potential to
3427 * be trashing registers and stack for dead variables.
3428 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3429
3430 /*
3431 * First try move it to a different register, as that's cheaper.
3432 */
3433 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3434 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3435 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3436 if (fRegs)
3437 {
3438 /* Avoid using shadow registers, if possible. */
3439 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3440 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3441 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3442 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3443 }
3444
3445 /*
3446 * Otherwise we must spill the register onto the stack.
3447 */
3448 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3449 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3450 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3451 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3452
3453 pVar->idxReg = UINT8_MAX;
3454 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3455 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3456 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3457 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3458 return off;
3459}
3460
3461
3462/**
3463 * Allocates a temporary host general purpose register.
3464 *
3465 * This may emit code to save register content onto the stack in order to free
3466 * up a register.
3467 *
3468 * @returns The host register number; throws VBox status code on failure,
3469 * so no need to check the return value.
3470 * @param pReNative The native recompile state.
3471 * @param poff Pointer to the variable with the code buffer position.
3472 * This will be update if we need to move a variable from
3473 * register to stack in order to satisfy the request.
3474 * @param a_fPreferVolatile Whether to prefer volatile over non-volatile
3475 * registers (@c true, default) or the other way around
3476 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3477 *
3478 * @note Must not modify the host status flags!
3479 */
3480template<bool const a_fPreferVolatile>
3481DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3482{
3483 /*
3484 * Try find a completely unused register, preferably a call-volatile one.
3485 */
3486 uint8_t idxReg;
3487 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3488 & ~pReNative->Core.bmHstRegsWithGstShadow
3489 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3490 if (fRegs)
3491 {
3492 if (a_fPreferVolatile)
3493 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3494 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3495 else
3496 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3497 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3498 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3499 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3500 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3501 }
3502 else
3503 {
3504 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile);
3505 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3506 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3507 }
3508 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3509}
3510
3511
3512/** See iemNativeRegAllocTmpInt for details. */
3513DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3514{
3515 return iemNativeRegAllocTmpInt<true>(pReNative, poff);
3516}
3517
3518
3519/** See iemNativeRegAllocTmpInt for details. */
3520DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3521{
3522 return iemNativeRegAllocTmpInt<false>(pReNative, poff);
3523}
3524
3525
3526/**
3527 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3528 * registers.
3529 *
3530 * @returns The host register number; throws VBox status code on failure,
3531 * so no need to check the return value.
3532 * @param pReNative The native recompile state.
3533 * @param poff Pointer to the variable with the code buffer position.
3534 * This will be update if we need to move a variable from
3535 * register to stack in order to satisfy the request.
3536 * @param fRegMask Mask of acceptable registers.
3537 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3538 * registers (@c true, default) or the other way around
3539 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3540 */
3541template<bool const a_fPreferVolatile>
3542DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3543{
3544 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3545 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3546
3547 /*
3548 * Try find a completely unused register, preferably a call-volatile one.
3549 */
3550 uint8_t idxReg;
3551 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3552 & ~pReNative->Core.bmHstRegsWithGstShadow
3553 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3554 & fRegMask;
3555 if (fRegs)
3556 {
3557 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3558 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3559 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3560 else
3561 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3562 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3563 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3564 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3565 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3566 }
3567 else
3568 {
3569 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, fRegMask);
3570 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3571 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3572 }
3573 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3574}
3575
3576
3577/** See iemNativeRegAllocTmpExInt for details. */
3578DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3579{
3580 return iemNativeRegAllocTmpExInt<true>(pReNative, poff, fRegMask);
3581}
3582
3583
3584/** See iemNativeRegAllocTmpExInt for details. */
3585DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpExPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3586{
3587 return iemNativeRegAllocTmpExInt<false>(pReNative, poff, fRegMask);
3588}
3589
3590
3591/** Internal templated variation of iemNativeRegAllocTmpEx. */
3592template<uint32_t const a_fRegMask, bool const a_fPreferVolatile>
3593DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3594{
3595 AssertCompile(!(a_fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3596 AssertCompile(!(a_fRegMask & IEMNATIVE_REG_FIXED_MASK));
3597
3598 /*
3599 * Try find a completely unused register, preferably a call-volatile one.
3600 */
3601 uint8_t idxReg;
3602 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3603 & ~pReNative->Core.bmHstRegsWithGstShadow
3604 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3605 & a_fRegMask;
3606 if (fRegs)
3607 {
3608 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3609 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3610 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3611 else
3612 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3613 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3614 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3615 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3616 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3617 }
3618 else
3619 {
3620 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, a_fRegMask);
3621 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3622 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3623 }
3624 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3625}
3626
3627
3628/**
3629 * Allocates a temporary register for loading an immediate value into.
3630 *
3631 * This will emit code to load the immediate, unless there happens to be an
3632 * unused register with the value already loaded.
3633 *
3634 * The caller will not modify the returned register, it must be considered
3635 * read-only. Free using iemNativeRegFreeTmpImm.
3636 *
3637 * @returns The host register number; throws VBox status code on failure, so no
3638 * need to check the return value.
3639 * @param pReNative The native recompile state.
3640 * @param poff Pointer to the variable with the code buffer position.
3641 * @param uImm The immediate value that the register must hold upon
3642 * return.
3643 * @note Prefers volatile registers.
3644 * @note Reusing immediate values has not been implemented yet.
3645 */
3646DECL_HIDDEN_THROW(uint8_t)
3647iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm)
3648{
3649 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff);
3650 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3651 return idxReg;
3652}
3653
3654
3655/**
3656 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3657 * iemNativeRegAllocTmpForGuestEFlags().
3658 *
3659 * See iemNativeRegAllocTmpForGuestRegInt() for details.
3660 */
3661template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, uint32_t const a_fRegMask>
3662static uint8_t iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3663{
3664 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3665#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3666 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3667#endif
3668
3669 /*
3670 * First check if the guest register value is already in a host register.
3671 */
3672 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3673 {
3674 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3675 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3676 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3677 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3678
3679 /* It's not supposed to be allocated... */
3680 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3681 {
3682 /*
3683 * If the register will trash the guest shadow copy, try find a
3684 * completely unused register we can use instead. If that fails,
3685 * we need to disassociate the host reg from the guest reg.
3686 */
3687 /** @todo would be nice to know if preserving the register is in any way helpful. */
3688 /* If the purpose is calculations, try duplicate the register value as
3689 we'll be clobbering the shadow. */
3690 if ( a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3691 && ( ~pReNative->Core.bmHstRegs
3692 & ~pReNative->Core.bmHstRegsWithGstShadow
3693 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3694 {
3695 uint8_t const idxRegNew = iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3696
3697 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3698
3699 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3700 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3701 g_apszIemNativeHstRegNames[idxRegNew]));
3702 idxReg = idxRegNew;
3703 }
3704 /* If the current register matches the restrictions, go ahead and allocate
3705 it for the caller. */
3706 else if (a_fRegMask & RT_BIT_32(idxReg))
3707 {
3708 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3709 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3710 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3711 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3712 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n", g_apszIemNativeHstRegNames[idxReg],
3713 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3714 else
3715 {
3716 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3717 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3718 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3719 }
3720 }
3721 /* Otherwise, allocate a register that satisfies the caller and transfer
3722 the shadowing if compatible with the intended use. (This basically
3723 means the call wants a non-volatile register (RSP push/pop scenario).) */
3724 else
3725 {
3726 Assert(!(a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3727 uint8_t const idxRegNew = (a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3728 && a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3729 ? iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg))
3730 : iemNativeRegAllocTmpExPreferNonVolatile(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg));
3731 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3732 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3733 {
3734 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3735 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3736 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3737 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3738 }
3739 else
3740 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3741 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3742 g_apszIemNativeHstRegNames[idxRegNew]));
3743 idxReg = idxRegNew;
3744 }
3745 }
3746 else
3747 {
3748 /*
3749 * Oops. Shadowed guest register already allocated!
3750 *
3751 * Allocate a new register, copy the value and, if updating, the
3752 * guest shadow copy assignment to the new register.
3753 */
3754 AssertMsg( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3755 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3756 ("This shouldn't happen: idxReg=%d enmGstReg=%d a_enmIntendedUse=%s\n",
3757 idxReg, enmGstReg, s_pszIntendedUse[a_enmIntendedUse]));
3758
3759 /** @todo share register for readonly access. */
3760 uint8_t const idxRegNew = a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3761 ? iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff)
3762 : iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff);
3763
3764 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3765 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3766
3767 if RT_CONSTEXPR_IF( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3768 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3769 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3770 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3771 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3772 else
3773 {
3774 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3775 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3776 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3777 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3778 }
3779 idxReg = idxRegNew;
3780 }
3781 Assert(RT_BIT_32(idxReg) & a_fRegMask); /* See assumption in fNoVolatileRegs docs. */
3782
3783#ifdef VBOX_STRICT
3784 /* Strict builds: Check that the value is correct. */
3785 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3786#endif
3787
3788#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3789 /** @todo r=aeichner Implement for registers other than GPR as well. */
3790 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3791 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3792 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3793 && enmGstReg <= kIemNativeGstReg_GprLast)
3794 || enmGstReg == kIemNativeGstReg_MxCsr)
3795 {
3796# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3797 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3798 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3799# endif
3800 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3801 }
3802#endif
3803
3804 return idxReg;
3805 }
3806
3807 /*
3808 * Allocate a new register, load it with the guest value and designate it as a copy of the
3809 */
3810 uint8_t const idxRegNew = a_enmIntendedUse != kIemNativeGstRegUse_Calculation
3811 ? iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff)
3812 : iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3813
3814 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3815 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3816
3817 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3818 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3819 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3820 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3821
3822#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3823 /** @todo r=aeichner Implement for registers other than GPR as well. */
3824 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3825 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3826 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3827 && enmGstReg <= kIemNativeGstReg_GprLast)
3828 || enmGstReg == kIemNativeGstReg_MxCsr)
3829 {
3830# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3831 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3832 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3833# endif
3834 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3835 }
3836#endif
3837
3838 return idxRegNew;
3839}
3840
3841
3842/**
3843 * Allocates a temporary host general purpose register for keeping a guest
3844 * register value.
3845 *
3846 * Since we may already have a register holding the guest register value,
3847 * code will be emitted to do the loading if that's not the case. Code may also
3848 * be emitted if we have to free up a register to satify the request.
3849 *
3850 * @returns The host register number; throws VBox status code on failure, so no
3851 * need to check the return value.
3852 * @param pReNative The native recompile state.
3853 * @param poff Pointer to the variable with the code buffer
3854 * position. This will be update if we need to move a
3855 * variable from register to stack in order to satisfy
3856 * the request.
3857 * @param enmGstReg The guest register that will is to be updated.
3858 * @param a_enmIntendedUse How the caller will be using the host register.
3859 * @param a_fNonVolatileRegs Set if no volatile register allowed, clear if any
3860 * register is okay (default). The ASSUMPTION here is
3861 * that the caller has already flushed all volatile
3862 * registers, so this is only applied if we allocate a
3863 * new register.
3864 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3865 */
3866template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, bool const a_fNonVolatileRegs>
3867DECL_FORCE_INLINE_THROW(uint8_t)
3868iemNativeRegAllocTmpForGuestRegInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3869{
3870#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3871 AssertMsg( pReNative->idxCurCall == 0
3872 || enmGstReg == kIemNativeGstReg_Pc
3873 || (a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3874 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3875 : a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3876 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3877 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3878 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3879#endif
3880
3881 if RT_CONSTEXPR_IF(!a_fNonVolatileRegs)
3882 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3883 IEMNATIVE_HST_GREG_MASK
3884 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, enmGstReg);
3885 else /* keep else, is required by MSC */
3886 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3887 IEMNATIVE_HST_GREG_MASK
3888 & ~IEMNATIVE_REG_FIXED_MASK
3889 & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK>(pReNative, poff, enmGstReg);
3890}
3891
3892/* Variants including volatile registers: */
3893
3894DECL_HIDDEN_THROW(uint8_t)
3895iemNativeRegAllocTmpForGuestRegReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3896{
3897 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, false>(pReNative, poff, enmGstReg);
3898}
3899
3900DECL_HIDDEN_THROW(uint8_t)
3901iemNativeRegAllocTmpForGuestRegUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3902{
3903 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, false>(pReNative, poff, enmGstReg);
3904}
3905
3906DECL_HIDDEN_THROW(uint8_t)
3907iemNativeRegAllocTmpForGuestRegFullWrite(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3908{
3909 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, false>(pReNative, poff, enmGstReg);
3910}
3911
3912DECL_HIDDEN_THROW(uint8_t)
3913iemNativeRegAllocTmpForGuestRegCalculation(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3914{
3915 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, false>(pReNative, poff, enmGstReg);
3916}
3917
3918/* Variants excluding any volatile registers: */
3919
3920DECL_HIDDEN_THROW(uint8_t)
3921iemNativeRegAllocTmpForGuestRegReadOnlyNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3922{
3923 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, true>(pReNative, poff, enmGstReg);
3924}
3925
3926DECL_HIDDEN_THROW(uint8_t)
3927iemNativeRegAllocTmpForGuestRegUpdateNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3928{
3929 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, true>(pReNative, poff, enmGstReg);
3930}
3931
3932DECL_HIDDEN_THROW(uint8_t)
3933iemNativeRegAllocTmpForGuestRegFullWriteNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3934{
3935 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, true>(pReNative, poff, enmGstReg);
3936}
3937
3938DECL_HIDDEN_THROW(uint8_t)
3939iemNativeRegAllocTmpForGuestRegCalculationNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3940{
3941 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, true>(pReNative, poff, enmGstReg);
3942}
3943
3944
3945
3946#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3947/**
3948 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3949 *
3950 * This takes additional arguments for covering liveness assertions in strict
3951 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3952 * kIemNativeGstReg_EFlags as argument.
3953 */
3954template<IEMNATIVEGSTREGUSE const a_enmIntendedUse>
3955DECL_FORCE_INLINE_THROW(uint8_t)
3956iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3957 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3958{
3959 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3960 {
3961 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3962 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3963 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3964 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3965 uint32_t fState;
3966# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3967 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3968 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3969 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3970 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3971 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3972 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3973 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3974 ) \
3975 , ("%s - %u\n", #a_enmGstEfl, fState))
3976 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3977 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3978 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3979 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3980 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3981 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3982 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3983# undef MY_ASSERT_ONE_EFL
3984 }
3985 RT_NOREF(fPotentialCall);
3986
3987 AssertCompile(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate);
3988 if RT_CONSTEXPR_IF(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly)
3989 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ReadOnly,
3990 IEMNATIVE_CALL_VOLATILE_GREG_MASK
3991 & IEMNATIVE_HST_GREG_MASK
3992 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3993 else /* keep else, is required by MSC */
3994 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ForUpdate,
3995 IEMNATIVE_CALL_VOLATILE_GREG_MASK
3996 & IEMNATIVE_HST_GREG_MASK
3997 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3998}
3999
4000
4001DECL_HIDDEN_THROW(uint8_t)
4002iemNativeRegAllocTmpForGuestEFlagsReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4003 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
4004{
4005 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ReadOnly>(pReNative, poff, fRead, fWrite, fPotentialCall);
4006}
4007
4008DECL_HIDDEN_THROW(uint8_t)
4009iemNativeRegAllocTmpForGuestEFlagsForUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
4010 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
4011{
4012 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ForUpdate>(pReNative, poff, fRead, fWrite, fPotentialCall);
4013}
4014
4015#endif
4016
4017
4018
4019/**
4020 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
4021 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
4022 *
4023 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
4024 */
4025DECL_FORCE_INLINE(uint8_t)
4026iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4027{
4028 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4029
4030 /*
4031 * First check if the guest register value is already in a host register.
4032 */
4033 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4034 {
4035 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4036 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4037 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4038 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4039
4040 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4041 {
4042 /*
4043 * We only do readonly use here, so easy compared to the other
4044 * variant of this code.
4045 */
4046 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4047 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4048 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4049 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4050 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4051
4052#ifdef VBOX_STRICT
4053 /* Strict builds: Check that the value is correct. */
4054 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4055#else
4056 RT_NOREF(poff);
4057#endif
4058 return idxReg;
4059 }
4060 }
4061
4062 return UINT8_MAX;
4063}
4064
4065
4066/**
4067 * Allocates a temporary host general purpose register that already holds the
4068 * given guest register value.
4069 *
4070 * The use case for this function is places where the shadowing state cannot be
4071 * modified due to branching and such. This will fail if the we don't have a
4072 * current shadow copy handy or if it's incompatible. The only code that will
4073 * be emitted here is value checking code in strict builds.
4074 *
4075 * The intended use can only be readonly!
4076 *
4077 * @returns The host register number, UINT8_MAX if not present.
4078 * @param pReNative The native recompile state.
4079 * @param poff Pointer to the instruction buffer offset.
4080 * Will be updated in strict builds if a register is
4081 * found.
4082 * @param enmGstReg The guest register that will is to be updated.
4083 * @note In strict builds, this may throw instruction buffer growth failures.
4084 * Non-strict builds will not throw anything.
4085 * @sa iemNativeRegAllocTmpForGuestReg
4086 */
4087DECL_HIDDEN_THROW(uint8_t)
4088iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4089{
4090#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4091 AssertMsg( pReNative->idxCurCall == 0
4092 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4093 || enmGstReg == kIemNativeGstReg_Pc
4094 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4095#endif
4096 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
4097}
4098
4099
4100#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
4101/**
4102 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
4103 * EFLAGS.
4104 *
4105 * This takes additional arguments for covering liveness assertions in strict
4106 * builds, it's otherwise the same as
4107 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
4108 * kIemNativeGstReg_EFlags as argument.
4109 *
4110 * @note The @a fWrite parameter is necessary to complete the liveness picture,
4111 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
4112 * commit. It the operation clobbers all the flags, @a fRead will be
4113 * zero, so better verify the whole picture while we're here.
4114 */
4115DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4116 uint64_t fRead, uint64_t fWrite /*=0*/)
4117{
4118 if (pReNative->idxCurCall != 0)
4119 {
4120 Assert(fRead | fWrite);
4121 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4122 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4123 uint64_t const fAll = fRead | fWrite;
4124 uint32_t fState;
4125# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
4126 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
4127 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
4128 || ( fRead & RT_BIT_64(a_enmGstEfl) \
4129 ? fWrite & RT_BIT_64(a_enmGstEfl) \
4130 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
4131 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
4132 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
4133 ) \
4134 , ("%s - %u\n", #a_enmGstEfl, fState))
4135 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
4136 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
4137 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
4138 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
4139 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
4140 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
4141 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
4142# undef MY_ASSERT_ONE_EFL
4143 }
4144 RT_NOREF(fRead);
4145 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
4146}
4147#endif
4148
4149
4150/**
4151 * Allocates argument registers for a function call.
4152 *
4153 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4154 * need to check the return value.
4155 * @param pReNative The native recompile state.
4156 * @param off The current code buffer offset.
4157 * @param cArgs The number of arguments the function call takes.
4158 */
4159DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4160{
4161 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4162 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4163 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4164 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4165
4166 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4167 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4168 else if (cArgs == 0)
4169 return true;
4170
4171 /*
4172 * Do we get luck and all register are free and not shadowing anything?
4173 */
4174 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4175 for (uint32_t i = 0; i < cArgs; i++)
4176 {
4177 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4178 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4179 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4180 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4181 }
4182 /*
4183 * Okay, not lucky so we have to free up the registers.
4184 */
4185 else
4186 for (uint32_t i = 0; i < cArgs; i++)
4187 {
4188 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4189 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4190 {
4191 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4192 {
4193 case kIemNativeWhat_Var:
4194 {
4195 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4196 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4197 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4198 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4199 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4200#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4201 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4202#endif
4203
4204 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4205 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4206 else
4207 {
4208 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4209 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4210 }
4211 break;
4212 }
4213
4214 case kIemNativeWhat_Tmp:
4215 case kIemNativeWhat_Arg:
4216 case kIemNativeWhat_rc:
4217 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4218 default:
4219 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4220 }
4221
4222 }
4223 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4224 {
4225 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4226 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4227 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4228#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4229 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4230#endif
4231 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4232 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4233 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4234 }
4235 else
4236 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4237 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4238 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4239 }
4240 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4241 return true;
4242}
4243
4244
4245DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4246
4247
4248#if 0
4249/**
4250 * Frees a register assignment of any type.
4251 *
4252 * @param pReNative The native recompile state.
4253 * @param idxHstReg The register to free.
4254 *
4255 * @note Does not update variables.
4256 */
4257DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4258{
4259 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4260 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4261 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4262 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4263 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4264 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4265 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4266 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4267 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4268 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4269 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4270 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4271 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4272 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4273
4274 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4275 /* no flushing, right:
4276 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4277 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4278 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4279 */
4280}
4281#endif
4282
4283
4284/**
4285 * Frees a temporary register.
4286 *
4287 * Any shadow copies of guest registers assigned to the host register will not
4288 * be flushed by this operation.
4289 */
4290DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4291{
4292 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4293 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4294 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4295 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4296 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4297}
4298
4299
4300/**
4301 * Frees a temporary immediate register.
4302 *
4303 * It is assumed that the call has not modified the register, so it still hold
4304 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4305 */
4306DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4307{
4308 iemNativeRegFreeTmp(pReNative, idxHstReg);
4309}
4310
4311
4312/**
4313 * Frees a register assigned to a variable.
4314 *
4315 * The register will be disassociated from the variable.
4316 */
4317DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4318{
4319 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4320 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4321 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4322 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4323 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4324#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4325 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4326#endif
4327
4328 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4329 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4330 if (!fFlushShadows)
4331 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4332 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4333 else
4334 {
4335 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4336 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4337#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4338 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4339#endif
4340 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4341 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4342 uint64_t fGstRegShadows = fGstRegShadowsOld;
4343 while (fGstRegShadows)
4344 {
4345 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4346 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4347
4348 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4349 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4350 }
4351 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4352 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4353 }
4354}
4355
4356
4357#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4358# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4359/** Host CPU SIMD register names. */
4360DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4361{
4362# ifdef RT_ARCH_AMD64
4363 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4364# elif RT_ARCH_ARM64
4365 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4366 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4367# else
4368# error "port me"
4369# endif
4370};
4371# endif
4372
4373
4374/**
4375 * Frees a SIMD register assigned to a variable.
4376 *
4377 * The register will be disassociated from the variable.
4378 */
4379DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4380{
4381 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4382 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4383 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4385 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4386 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4387
4388 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4389 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4390 if (!fFlushShadows)
4391 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4392 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4393 else
4394 {
4395 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4396 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4397 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4398 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4399 uint64_t fGstRegShadows = fGstRegShadowsOld;
4400 while (fGstRegShadows)
4401 {
4402 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4403 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4404
4405 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4406 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4407 }
4408 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4409 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4410 }
4411}
4412
4413
4414/**
4415 * Reassigns a variable to a different SIMD register specified by the caller.
4416 *
4417 * @returns The new code buffer position.
4418 * @param pReNative The native recompile state.
4419 * @param off The current code buffer position.
4420 * @param idxVar The variable index.
4421 * @param idxRegOld The old host register number.
4422 * @param idxRegNew The new host register number.
4423 * @param pszCaller The caller for logging.
4424 */
4425static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4426 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4427{
4428 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4429 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4430 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4431 RT_NOREF(pszCaller);
4432
4433 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4434 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4435 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4436
4437 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4438 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4439 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4440
4441 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4442 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4443 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4444
4445 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4446 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4447 else
4448 {
4449 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4450 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4451 }
4452
4453 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4454 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4455 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4456 if (fGstRegShadows)
4457 {
4458 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4459 | RT_BIT_32(idxRegNew);
4460 while (fGstRegShadows)
4461 {
4462 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4463 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4464
4465 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4466 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4467 }
4468 }
4469
4470 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4471 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4472 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4473 return off;
4474}
4475
4476
4477/**
4478 * Moves a variable to a different register or spills it onto the stack.
4479 *
4480 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4481 * kinds can easily be recreated if needed later.
4482 *
4483 * @returns The new code buffer position.
4484 * @param pReNative The native recompile state.
4485 * @param off The current code buffer position.
4486 * @param idxVar The variable index.
4487 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4488 * call-volatile registers.
4489 */
4490DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4491 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4492{
4493 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4494 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4495 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4496 Assert(!pVar->fRegAcquired);
4497 Assert(!pVar->fSimdReg);
4498
4499 uint8_t const idxRegOld = pVar->idxReg;
4500 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4501 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4502 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4503 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4504 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4505 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4506 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4507 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4508 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4509 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4510
4511 /** @todo Add statistics on this.*/
4512 /** @todo Implement basic variable liveness analysis (python) so variables
4513 * can be freed immediately once no longer used. This has the potential to
4514 * be trashing registers and stack for dead variables.
4515 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4516
4517 /*
4518 * First try move it to a different register, as that's cheaper.
4519 */
4520 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4521 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4522 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4523 if (fRegs)
4524 {
4525 /* Avoid using shadow registers, if possible. */
4526 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4527 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4528 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4529 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4530 }
4531
4532 /*
4533 * Otherwise we must spill the register onto the stack.
4534 */
4535 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4536 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4537 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4538
4539 if (pVar->cbVar == sizeof(RTUINT128U))
4540 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4541 else
4542 {
4543 Assert(pVar->cbVar == sizeof(RTUINT256U));
4544 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4545 }
4546
4547 pVar->idxReg = UINT8_MAX;
4548 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4549 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4550 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4551 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4552 return off;
4553}
4554
4555
4556/**
4557 * Called right before emitting a call instruction to move anything important
4558 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4559 * optionally freeing argument variables.
4560 *
4561 * @returns New code buffer offset, UINT32_MAX on failure.
4562 * @param pReNative The native recompile state.
4563 * @param off The code buffer offset.
4564 * @param cArgs The number of arguments the function call takes.
4565 * It is presumed that the host register part of these have
4566 * been allocated as such already and won't need moving,
4567 * just freeing.
4568 * @param fKeepVars Mask of variables that should keep their register
4569 * assignments. Caller must take care to handle these.
4570 */
4571DECL_HIDDEN_THROW(uint32_t)
4572iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4573{
4574 Assert(!cArgs); RT_NOREF(cArgs);
4575
4576 /* fKeepVars will reduce this mask. */
4577 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4578
4579 /*
4580 * Move anything important out of volatile registers.
4581 */
4582 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4583#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4584 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4585#endif
4586 ;
4587
4588 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4589 if (!fSimdRegsToMove)
4590 { /* likely */ }
4591 else
4592 {
4593 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4594 while (fSimdRegsToMove != 0)
4595 {
4596 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4597 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4598
4599 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4600 {
4601 case kIemNativeWhat_Var:
4602 {
4603 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4604 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4605 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4606 Assert(pVar->idxReg == idxSimdReg);
4607 Assert(pVar->fSimdReg);
4608 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4609 {
4610 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4611 idxVar, pVar->enmKind, pVar->idxReg));
4612 if (pVar->enmKind != kIemNativeVarKind_Stack)
4613 pVar->idxReg = UINT8_MAX;
4614 else
4615 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4616 }
4617 else
4618 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4619 continue;
4620 }
4621
4622 case kIemNativeWhat_Arg:
4623 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4624 continue;
4625
4626 case kIemNativeWhat_rc:
4627 case kIemNativeWhat_Tmp:
4628 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4629 continue;
4630
4631 case kIemNativeWhat_FixedReserved:
4632#ifdef RT_ARCH_ARM64
4633 continue; /* On ARM the upper half of the virtual 256-bit register. */
4634#endif
4635
4636 case kIemNativeWhat_FixedTmp:
4637 case kIemNativeWhat_pVCpuFixed:
4638 case kIemNativeWhat_pCtxFixed:
4639 case kIemNativeWhat_PcShadow:
4640 case kIemNativeWhat_Invalid:
4641 case kIemNativeWhat_End:
4642 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4643 }
4644 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4645 }
4646 }
4647
4648 /*
4649 * Do the actual freeing.
4650 */
4651 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4652 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4653 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4654 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4655
4656 /* If there are guest register shadows in any call-volatile register, we
4657 have to clear the corrsponding guest register masks for each register. */
4658 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4659 if (fHstSimdRegsWithGstShadow)
4660 {
4661 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4662 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4663 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4664 do
4665 {
4666 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4667 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4668
4669 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4670
4671#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4672 /*
4673 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4674 * to call volatile registers).
4675 */
4676 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4677 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4678 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4679#endif
4680 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4681 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4682
4683 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4684 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4685 } while (fHstSimdRegsWithGstShadow != 0);
4686 }
4687
4688 return off;
4689}
4690#endif
4691
4692
4693/**
4694 * Called right before emitting a call instruction to move anything important
4695 * out of call-volatile registers, free and flush the call-volatile registers,
4696 * optionally freeing argument variables.
4697 *
4698 * @returns New code buffer offset, UINT32_MAX on failure.
4699 * @param pReNative The native recompile state.
4700 * @param off The code buffer offset.
4701 * @param cArgs The number of arguments the function call takes.
4702 * It is presumed that the host register part of these have
4703 * been allocated as such already and won't need moving,
4704 * just freeing.
4705 * @param fKeepVars Mask of variables that should keep their register
4706 * assignments. Caller must take care to handle these.
4707 */
4708DECL_HIDDEN_THROW(uint32_t)
4709iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4710{
4711 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4712
4713 /* fKeepVars will reduce this mask. */
4714 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4715
4716#ifdef RT_ARCH_ARM64
4717AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4718#endif
4719
4720 /*
4721 * Move anything important out of volatile registers.
4722 */
4723 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4724 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4725 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4726#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4727 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4728#endif
4729 & ~g_afIemNativeCallRegs[cArgs];
4730
4731 fRegsToMove &= pReNative->Core.bmHstRegs;
4732 if (!fRegsToMove)
4733 { /* likely */ }
4734 else
4735 {
4736 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4737 while (fRegsToMove != 0)
4738 {
4739 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4740 fRegsToMove &= ~RT_BIT_32(idxReg);
4741
4742 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4743 {
4744 case kIemNativeWhat_Var:
4745 {
4746 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4747 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4748 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4749 Assert(pVar->idxReg == idxReg);
4750#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4751 Assert(!pVar->fSimdReg);
4752#endif
4753 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4754 {
4755 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4756 idxVar, pVar->enmKind, pVar->idxReg));
4757 if (pVar->enmKind != kIemNativeVarKind_Stack)
4758 pVar->idxReg = UINT8_MAX;
4759 else
4760 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4761 }
4762 else
4763 fRegsToFree &= ~RT_BIT_32(idxReg);
4764 continue;
4765 }
4766
4767 case kIemNativeWhat_Arg:
4768 AssertMsgFailed(("What?!?: %u\n", idxReg));
4769 continue;
4770
4771 case kIemNativeWhat_rc:
4772 case kIemNativeWhat_Tmp:
4773 AssertMsgFailed(("Missing free: %u\n", idxReg));
4774 continue;
4775
4776 case kIemNativeWhat_FixedTmp:
4777 case kIemNativeWhat_pVCpuFixed:
4778 case kIemNativeWhat_pCtxFixed:
4779 case kIemNativeWhat_PcShadow:
4780 case kIemNativeWhat_FixedReserved:
4781 case kIemNativeWhat_Invalid:
4782 case kIemNativeWhat_End:
4783 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4784 }
4785 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4786 }
4787 }
4788
4789 /*
4790 * Do the actual freeing.
4791 */
4792 if (pReNative->Core.bmHstRegs & fRegsToFree)
4793 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4794 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4795 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4796
4797 /* If there are guest register shadows in any call-volatile register, we
4798 have to clear the corrsponding guest register masks for each register. */
4799 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4800 if (fHstRegsWithGstShadow)
4801 {
4802 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4803 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4804 fHstRegsWithGstShadow));
4805 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4806 do
4807 {
4808 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4809 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4810
4811 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4812
4813#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4814 /*
4815 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4816 * to call volatile registers).
4817 */
4818 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4819 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4820 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4821#endif
4822
4823 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4824 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4825 } while (fHstRegsWithGstShadow != 0);
4826 }
4827
4828#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4829 /* Now for the SIMD registers, no argument support for now. */
4830 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4831#endif
4832
4833 return off;
4834}
4835
4836
4837/**
4838 * Flushes a set of guest register shadow copies.
4839 *
4840 * This is usually done after calling a threaded function or a C-implementation
4841 * of an instruction.
4842 *
4843 * @param pReNative The native recompile state.
4844 * @param fGstRegs Set of guest registers to flush.
4845 */
4846DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4847{
4848 /*
4849 * Reduce the mask by what's currently shadowed
4850 */
4851 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4852 fGstRegs &= bmGstRegShadowsOld;
4853 if (fGstRegs)
4854 {
4855 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4856 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4857 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4858 if (bmGstRegShadowsNew)
4859 {
4860 /*
4861 * Partial.
4862 */
4863 do
4864 {
4865 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4866 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4867 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4868 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4869 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4870#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4871 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4872#endif
4873
4874 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4875 fGstRegs &= ~fInThisHstReg;
4876 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4877 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4878 if (!fGstRegShadowsNew)
4879 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4880 } while (fGstRegs != 0);
4881 }
4882 else
4883 {
4884 /*
4885 * Clear all.
4886 */
4887 do
4888 {
4889 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4890 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4891 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4892 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4893 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4894#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4895 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4896#endif
4897
4898 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4899 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4900 } while (fGstRegs != 0);
4901 pReNative->Core.bmHstRegsWithGstShadow = 0;
4902 }
4903 }
4904}
4905
4906
4907/**
4908 * Flushes guest register shadow copies held by a set of host registers.
4909 *
4910 * This is used with the TLB lookup code for ensuring that we don't carry on
4911 * with any guest shadows in volatile registers, as these will get corrupted by
4912 * a TLB miss.
4913 *
4914 * @param pReNative The native recompile state.
4915 * @param fHstRegs Set of host registers to flush guest shadows for.
4916 */
4917DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4918{
4919 /*
4920 * Reduce the mask by what's currently shadowed.
4921 */
4922 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4923 fHstRegs &= bmHstRegsWithGstShadowOld;
4924 if (fHstRegs)
4925 {
4926 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4927 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4928 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4929 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4930 if (bmHstRegsWithGstShadowNew)
4931 {
4932 /*
4933 * Partial (likely).
4934 */
4935 uint64_t fGstShadows = 0;
4936 do
4937 {
4938 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4939 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4940 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4941 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4942#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4943 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4944#endif
4945
4946 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4947 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4948 fHstRegs &= ~RT_BIT_32(idxHstReg);
4949 } while (fHstRegs != 0);
4950 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4951 }
4952 else
4953 {
4954 /*
4955 * Clear all.
4956 */
4957 do
4958 {
4959 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4960 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4961 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4962 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4963#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4964 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4965#endif
4966
4967 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4968 fHstRegs &= ~RT_BIT_32(idxHstReg);
4969 } while (fHstRegs != 0);
4970 pReNative->Core.bmGstRegShadows = 0;
4971 }
4972 }
4973}
4974
4975
4976/**
4977 * Restores guest shadow copies in volatile registers.
4978 *
4979 * This is used after calling a helper function (think TLB miss) to restore the
4980 * register state of volatile registers.
4981 *
4982 * @param pReNative The native recompile state.
4983 * @param off The code buffer offset.
4984 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4985 * be active (allocated) w/o asserting. Hack.
4986 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4987 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4988 */
4989DECL_HIDDEN_THROW(uint32_t)
4990iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4991{
4992 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4993 if (fHstRegs)
4994 {
4995 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4996 do
4997 {
4998 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4999
5000 /* It's not fatal if a register is active holding a variable that
5001 shadowing a guest register, ASSUMING all pending guest register
5002 writes were flushed prior to the helper call. However, we'll be
5003 emitting duplicate restores, so it wasts code space. */
5004 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5005 RT_NOREF(fHstRegsActiveShadows);
5006
5007 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5008#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5009 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
5010#endif
5011 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5012 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5013 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5014
5015 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5016 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5017
5018 fHstRegs &= ~RT_BIT_32(idxHstReg);
5019 } while (fHstRegs != 0);
5020 }
5021 return off;
5022}
5023
5024
5025
5026
5027/*********************************************************************************************************************************
5028* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5029*********************************************************************************************************************************/
5030#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5031
5032/**
5033 * Info about shadowed guest SIMD register values.
5034 * @see IEMNATIVEGSTSIMDREG
5035 */
5036static struct
5037{
5038 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5039 uint32_t offXmm;
5040 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5041 uint32_t offYmm;
5042 /** Name (for logging). */
5043 const char *pszName;
5044} const g_aGstSimdShadowInfo[] =
5045{
5046#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5047 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5048 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5049 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5050 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5051 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5052 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5053 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5054 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5055 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5056 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5057 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5058 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5059 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5060 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5061 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5062 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5063 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5064#undef CPUMCTX_OFF_AND_SIZE
5065};
5066AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5067
5068
5069/**
5070 * Frees a temporary SIMD register.
5071 *
5072 * Any shadow copies of guest registers assigned to the host register will not
5073 * be flushed by this operation.
5074 */
5075DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5076{
5077 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5078 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5079 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5080 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5081 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5082}
5083
5084
5085/**
5086 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5087 *
5088 * @returns New code bufferoffset.
5089 * @param pReNative The native recompile state.
5090 * @param off Current code buffer position.
5091 * @param enmGstSimdReg The guest SIMD register to flush.
5092 */
5093DECL_HIDDEN_THROW(uint32_t)
5094iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5095{
5096 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5097
5098 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5099 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5100 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5101 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5102
5103 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5104 {
5105 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5106 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5107 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5108 }
5109
5110 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5111 {
5112 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5113 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5114 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5115 }
5116
5117 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5118 return off;
5119}
5120
5121
5122/**
5123 * Flush the given set of guest SIMD registers if marked as dirty.
5124 *
5125 * @returns New code buffer offset.
5126 * @param pReNative The native recompile state.
5127 * @param off Current code buffer position.
5128 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5129 */
5130DECL_HIDDEN_THROW(uint32_t)
5131iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5132{
5133 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5134 & fFlushGstSimdReg;
5135 if (bmGstSimdRegShadowDirty)
5136 {
5137# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5138 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5139 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5140# endif
5141
5142 do
5143 {
5144 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5145 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5146 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5147 } while (bmGstSimdRegShadowDirty);
5148 }
5149
5150 return off;
5151}
5152
5153
5154#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5155/**
5156 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5157 *
5158 * @returns New code buffer offset.
5159 * @param pReNative The native recompile state.
5160 * @param off Current code buffer position.
5161 * @param idxHstSimdReg The host SIMD register.
5162 *
5163 * @note This doesn't do any unshadowing of guest registers from the host register.
5164 */
5165DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5166{
5167 /* We need to flush any pending guest register writes this host register shadows. */
5168 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5169 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5170 if (bmGstSimdRegShadowDirty)
5171 {
5172# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5173 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5174 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5175# endif
5176
5177 do
5178 {
5179 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5180 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5181 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5182 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5183 } while (bmGstSimdRegShadowDirty);
5184 }
5185
5186 return off;
5187}
5188#endif
5189
5190
5191/**
5192 * Locate a register, possibly freeing one up.
5193 *
5194 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5195 * failed.
5196 *
5197 * @returns Host register number on success. Returns UINT8_MAX if no registers
5198 * found, the caller is supposed to deal with this and raise a
5199 * allocation type specific status code (if desired).
5200 *
5201 * @throws VBox status code if we're run into trouble spilling a variable of
5202 * recording debug info. Does NOT throw anything if we're out of
5203 * registers, though.
5204 */
5205static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5206 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5207{
5208 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5209 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5210 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5211
5212 /*
5213 * Try a freed register that's shadowing a guest register.
5214 */
5215 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5216 if (fRegs)
5217 {
5218 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5219
5220#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5221 /*
5222 * When we have livness information, we use it to kick out all shadowed
5223 * guest register that will not be needed any more in this TB. If we're
5224 * lucky, this may prevent us from ending up here again.
5225 *
5226 * Note! We must consider the previous entry here so we don't free
5227 * anything that the current threaded function requires (current
5228 * entry is produced by the next threaded function).
5229 */
5230 uint32_t const idxCurCall = pReNative->idxCurCall;
5231 if (idxCurCall > 0)
5232 {
5233 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5234 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5235
5236 /* If it matches any shadowed registers. */
5237 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5238 {
5239 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5240 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5241 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5242
5243 /* See if we've got any unshadowed registers we can return now. */
5244 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5245 if (fUnshadowedRegs)
5246 {
5247 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5248 return (fPreferVolatile
5249 ? ASMBitFirstSetU32(fUnshadowedRegs)
5250 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5251 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5252 - 1;
5253 }
5254 }
5255 }
5256#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5257
5258 unsigned const idxReg = (fPreferVolatile
5259 ? ASMBitFirstSetU32(fRegs)
5260 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5261 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5262 - 1;
5263
5264 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5265 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5266 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5267 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5268
5269 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5270 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5271
5272 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5273 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5274 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5275 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5276 return idxReg;
5277 }
5278
5279 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5280
5281 /*
5282 * Try free up a variable that's in a register.
5283 *
5284 * We do two rounds here, first evacuating variables we don't need to be
5285 * saved on the stack, then in the second round move things to the stack.
5286 */
5287 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5288 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5289 {
5290 uint32_t fVars = pReNative->Core.bmVars;
5291 while (fVars)
5292 {
5293 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5294 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5295 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5296 continue;
5297
5298 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5299 && (RT_BIT_32(idxReg) & fRegMask)
5300 && ( iLoop == 0
5301 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5302 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5303 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5304 {
5305 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5306 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5307 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5308 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5309 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5310 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5311
5312 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5313 {
5314 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5315 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5316 }
5317
5318 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5319 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5320
5321 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5322 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5323 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5324 return idxReg;
5325 }
5326 fVars &= ~RT_BIT_32(idxVar);
5327 }
5328 }
5329
5330 AssertFailed();
5331 return UINT8_MAX;
5332}
5333
5334
5335/**
5336 * Flushes a set of guest register shadow copies.
5337 *
5338 * This is usually done after calling a threaded function or a C-implementation
5339 * of an instruction.
5340 *
5341 * @param pReNative The native recompile state.
5342 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5343 */
5344DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5345{
5346 /*
5347 * Reduce the mask by what's currently shadowed
5348 */
5349 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5350 fGstSimdRegs &= bmGstSimdRegShadows;
5351 if (fGstSimdRegs)
5352 {
5353 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5354 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5355 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5356 if (bmGstSimdRegShadowsNew)
5357 {
5358 /*
5359 * Partial.
5360 */
5361 do
5362 {
5363 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5364 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5365 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5366 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5367 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5368 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5369
5370 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5371 fGstSimdRegs &= ~fInThisHstReg;
5372 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5373 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5374 if (!fGstRegShadowsNew)
5375 {
5376 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5377 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5378 }
5379 } while (fGstSimdRegs != 0);
5380 }
5381 else
5382 {
5383 /*
5384 * Clear all.
5385 */
5386 do
5387 {
5388 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5389 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5390 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5391 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5392 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5393 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5394
5395 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5396 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5397 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5398 } while (fGstSimdRegs != 0);
5399 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5400 }
5401 }
5402}
5403
5404
5405/**
5406 * Allocates a temporary host SIMD register.
5407 *
5408 * This may emit code to save register content onto the stack in order to free
5409 * up a register.
5410 *
5411 * @returns The host register number; throws VBox status code on failure,
5412 * so no need to check the return value.
5413 * @param pReNative The native recompile state.
5414 * @param poff Pointer to the variable with the code buffer position.
5415 * This will be update if we need to move a variable from
5416 * register to stack in order to satisfy the request.
5417 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5418 * registers (@c true, default) or the other way around
5419 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5420 */
5421DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5422{
5423 /*
5424 * Try find a completely unused register, preferably a call-volatile one.
5425 */
5426 uint8_t idxSimdReg;
5427 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5428 & ~pReNative->Core.bmHstRegsWithGstShadow
5429 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5430 if (fRegs)
5431 {
5432 if (fPreferVolatile)
5433 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5434 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5435 else
5436 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5437 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5438 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5439 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5440
5441 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5442 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5443 }
5444 else
5445 {
5446 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5447 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5448 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5449 }
5450
5451 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5452 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5453}
5454
5455
5456/**
5457 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5458 * registers.
5459 *
5460 * @returns The host register number; throws VBox status code on failure,
5461 * so no need to check the return value.
5462 * @param pReNative The native recompile state.
5463 * @param poff Pointer to the variable with the code buffer position.
5464 * This will be update if we need to move a variable from
5465 * register to stack in order to satisfy the request.
5466 * @param fRegMask Mask of acceptable registers.
5467 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5468 * registers (@c true, default) or the other way around
5469 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5470 */
5471DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5472 bool fPreferVolatile /*= true*/)
5473{
5474 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5475 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5476
5477 /*
5478 * Try find a completely unused register, preferably a call-volatile one.
5479 */
5480 uint8_t idxSimdReg;
5481 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5482 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5483 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5484 & fRegMask;
5485 if (fRegs)
5486 {
5487 if (fPreferVolatile)
5488 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5489 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5490 else
5491 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5492 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5493 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5494 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5495
5496 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5497 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5498 }
5499 else
5500 {
5501 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5502 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5503 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5504 }
5505
5506 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5507 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5508}
5509
5510
5511/**
5512 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5513 *
5514 * @param pReNative The native recompile state.
5515 * @param idxHstSimdReg The host SIMD register to update the state for.
5516 * @param enmLoadSz The load size to set.
5517 */
5518DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5519 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5520{
5521 /* Everything valid already? -> nothing to do. */
5522 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5523 return;
5524
5525 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5526 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5527 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5528 {
5529 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5530 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5531 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5532 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5533 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5534 }
5535}
5536
5537
5538static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5539 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5540{
5541 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5542 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5543 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5544 {
5545# ifdef RT_ARCH_ARM64
5546 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5547 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5548# endif
5549
5550 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5551 {
5552 switch (enmLoadSzDst)
5553 {
5554 case kIemNativeGstSimdRegLdStSz_256:
5555 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5556 break;
5557 case kIemNativeGstSimdRegLdStSz_Low128:
5558 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5559 break;
5560 case kIemNativeGstSimdRegLdStSz_High128:
5561 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5562 break;
5563 default:
5564 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5565 }
5566
5567 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5568 }
5569 }
5570 else
5571 {
5572 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5573 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5574 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5575 }
5576
5577 return off;
5578}
5579
5580
5581/**
5582 * Allocates a temporary host SIMD register for keeping a guest
5583 * SIMD register value.
5584 *
5585 * Since we may already have a register holding the guest register value,
5586 * code will be emitted to do the loading if that's not the case. Code may also
5587 * be emitted if we have to free up a register to satify the request.
5588 *
5589 * @returns The host register number; throws VBox status code on failure, so no
5590 * need to check the return value.
5591 * @param pReNative The native recompile state.
5592 * @param poff Pointer to the variable with the code buffer
5593 * position. This will be update if we need to move a
5594 * variable from register to stack in order to satisfy
5595 * the request.
5596 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5597 * @param enmIntendedUse How the caller will be using the host register.
5598 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5599 * register is okay (default). The ASSUMPTION here is
5600 * that the caller has already flushed all volatile
5601 * registers, so this is only applied if we allocate a
5602 * new register.
5603 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5604 */
5605DECL_HIDDEN_THROW(uint8_t)
5606iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5607 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5608 bool fNoVolatileRegs /*= false*/)
5609{
5610 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5611#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5612 AssertMsg( pReNative->idxCurCall == 0
5613 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5614 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5615 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5616 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5617 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5618 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5619#endif
5620#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5621 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5622#endif
5623 uint32_t const fRegMask = !fNoVolatileRegs
5624 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5625 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5626
5627 /*
5628 * First check if the guest register value is already in a host register.
5629 */
5630 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5631 {
5632 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5633 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5634 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5635 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5636
5637 /* It's not supposed to be allocated... */
5638 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5639 {
5640 /*
5641 * If the register will trash the guest shadow copy, try find a
5642 * completely unused register we can use instead. If that fails,
5643 * we need to disassociate the host reg from the guest reg.
5644 */
5645 /** @todo would be nice to know if preserving the register is in any way helpful. */
5646 /* If the purpose is calculations, try duplicate the register value as
5647 we'll be clobbering the shadow. */
5648 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5649 && ( ~pReNative->Core.bmHstSimdRegs
5650 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5651 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5652 {
5653 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5654
5655 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5656
5657 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5658 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5659 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5660 idxSimdReg = idxRegNew;
5661 }
5662 /* If the current register matches the restrictions, go ahead and allocate
5663 it for the caller. */
5664 else if (fRegMask & RT_BIT_32(idxSimdReg))
5665 {
5666 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5667 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5668 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5669 {
5670 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5671 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5672 else
5673 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5674 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5675 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5676 }
5677 else
5678 {
5679 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5680 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5681 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5682 }
5683 }
5684 /* Otherwise, allocate a register that satisfies the caller and transfer
5685 the shadowing if compatible with the intended use. (This basically
5686 means the call wants a non-volatile register (RSP push/pop scenario).) */
5687 else
5688 {
5689 Assert(fNoVolatileRegs);
5690 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5691 !fNoVolatileRegs
5692 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5693 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5694 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5695 {
5696 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5697 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5698 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5699 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5700 }
5701 else
5702 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5703 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5704 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5705 idxSimdReg = idxRegNew;
5706 }
5707 }
5708 else
5709 {
5710 /*
5711 * Oops. Shadowed guest register already allocated!
5712 *
5713 * Allocate a new register, copy the value and, if updating, the
5714 * guest shadow copy assignment to the new register.
5715 */
5716 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5717 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5718 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5719 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5720
5721 /** @todo share register for readonly access. */
5722 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5723 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5724
5725 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5726 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5727 else
5728 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5729
5730 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5731 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5732 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5733 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5734 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5735 else
5736 {
5737 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5738 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5739 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5740 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5741 }
5742 idxSimdReg = idxRegNew;
5743 }
5744 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5745
5746#ifdef VBOX_STRICT
5747 /* Strict builds: Check that the value is correct. */
5748 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5749 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5750#endif
5751
5752 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5753 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5754 {
5755# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5756 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5757 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5758# endif
5759
5760 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5761 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5762 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5763 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5764 else
5765 {
5766 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5767 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5768 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5769 }
5770 }
5771
5772 return idxSimdReg;
5773 }
5774
5775 /*
5776 * Allocate a new register, load it with the guest value and designate it as a copy of the
5777 */
5778 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5779
5780 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5781 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5782 else
5783 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5784
5785 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5786 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5787
5788 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5789 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5790 {
5791# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5792 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5793 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5794# endif
5795
5796 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5797 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5798 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5799 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5800 else
5801 {
5802 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5803 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5804 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5805 }
5806 }
5807
5808 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5809 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5810
5811 return idxRegNew;
5812}
5813
5814
5815/**
5816 * Flushes guest SIMD register shadow copies held by a set of host registers.
5817 *
5818 * This is used whenever calling an external helper for ensuring that we don't carry on
5819 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5820 *
5821 * @param pReNative The native recompile state.
5822 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5823 */
5824DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5825{
5826 /*
5827 * Reduce the mask by what's currently shadowed.
5828 */
5829 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5830 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5831 if (fHstSimdRegs)
5832 {
5833 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5834 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5835 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5836 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5837 if (bmHstSimdRegsWithGstShadowNew)
5838 {
5839 /*
5840 * Partial (likely).
5841 */
5842 uint64_t fGstShadows = 0;
5843 do
5844 {
5845 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5846 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5847 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5848 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5849 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5850 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5851
5852 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5853 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5854 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5855 } while (fHstSimdRegs != 0);
5856 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5857 }
5858 else
5859 {
5860 /*
5861 * Clear all.
5862 */
5863 do
5864 {
5865 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5866 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5867 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5868 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5869 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5870 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5871
5872 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5873 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5874 } while (fHstSimdRegs != 0);
5875 pReNative->Core.bmGstSimdRegShadows = 0;
5876 }
5877 }
5878}
5879#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5880
5881
5882
5883/*********************************************************************************************************************************
5884* Code emitters for flushing pending guest register writes and sanity checks *
5885*********************************************************************************************************************************/
5886
5887#ifdef VBOX_STRICT
5888/**
5889 * Does internal register allocator sanity checks.
5890 */
5891DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5892{
5893 /*
5894 * Iterate host registers building a guest shadowing set.
5895 */
5896 uint64_t bmGstRegShadows = 0;
5897 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5898 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5899 while (bmHstRegsWithGstShadow)
5900 {
5901 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5902 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5903 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5904
5905 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5906 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5907 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5908 bmGstRegShadows |= fThisGstRegShadows;
5909 while (fThisGstRegShadows)
5910 {
5911 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5912 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5913 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5914 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5915 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5916 }
5917 }
5918 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5919 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5920 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5921
5922 /*
5923 * Now the other way around, checking the guest to host index array.
5924 */
5925 bmHstRegsWithGstShadow = 0;
5926 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5927 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5928 while (bmGstRegShadows)
5929 {
5930 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5931 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5932 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5933
5934 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5935 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5936 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5937 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5938 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5939 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5940 }
5941 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5942 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5943 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5944}
5945#endif /* VBOX_STRICT */
5946
5947
5948/**
5949 * Flushes any delayed guest register writes.
5950 *
5951 * This must be called prior to calling CImpl functions and any helpers that use
5952 * the guest state (like raising exceptions) and such.
5953 *
5954 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5955 * the caller if it wishes to do so.
5956 */
5957DECL_HIDDEN_THROW(uint32_t)
5958iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5959{
5960#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5961 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5962 off = iemNativeEmitPcWriteback(pReNative, off);
5963#else
5964 RT_NOREF(pReNative, fGstShwExcept);
5965#endif
5966
5967#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5968 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5969#endif
5970
5971#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5972 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5973#endif
5974
5975 return off;
5976}
5977
5978#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5979
5980# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5981
5982/**
5983 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5984 */
5985DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5986{
5987 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5988 Assert(pReNative->Core.fDebugPcInitialized);
5989
5990 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5991# ifdef RT_ARCH_AMD64
5992 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5993 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5994 pCodeBuf[off++] = 0x3b;
5995 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5996# else
5997 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5998 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5999 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
6000# endif
6001
6002 uint32_t offFixup = off;
6003 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
6004 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
6005 iemNativeFixupFixedJump(pReNative, offFixup, off);
6006
6007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6008 return off;
6009}
6010
6011
6012/**
6013 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
6014 */
6015DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6016{
6017 if (pReNative->Core.fDebugPcInitialized)
6018 {
6019 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
6020 if (pReNative->Core.offPc)
6021 {
6022 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6023 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
6024 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
6025 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6026 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
6027 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6028 }
6029 else
6030 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6031 iemNativeRegFreeTmp(pReNative, idxPcReg);
6032 }
6033 return off;
6034}
6035
6036# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
6037
6038/**
6039 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6040 */
6041DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6042{
6043 Assert(pReNative->Core.offPc);
6044# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
6045 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
6046# else
6047 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
6048 uint8_t idxCurCall = pReNative->idxCurCall;
6049 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
6050 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
6051 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
6052 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
6053 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
6054 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
6055 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
6056
6057 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
6058
6059# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6060 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6061 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
6062# endif
6063# endif
6064
6065# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6066 /* Allocate a temporary PC register. */
6067 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6068
6069 /* Perform the addition and store the result. */
6070 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6071 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6072# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6073 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6074# endif
6075
6076 /* Free but don't flush the PC register. */
6077 iemNativeRegFreeTmp(pReNative, idxPcReg);
6078# else
6079 /* Compare the shadow with the context value, they should match. */
6080 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6081 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6082# endif
6083
6084 pReNative->Core.offPc = 0;
6085
6086 return off;
6087}
6088
6089#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6090
6091
6092/*********************************************************************************************************************************
6093* Code Emitters (larger snippets) *
6094*********************************************************************************************************************************/
6095
6096/**
6097 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6098 * extending to 64-bit width.
6099 *
6100 * @returns New code buffer offset on success, UINT32_MAX on failure.
6101 * @param pReNative .
6102 * @param off The current code buffer position.
6103 * @param idxHstReg The host register to load the guest register value into.
6104 * @param enmGstReg The guest register to load.
6105 *
6106 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6107 * that is something the caller needs to do if applicable.
6108 */
6109DECL_HIDDEN_THROW(uint32_t)
6110iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6111{
6112 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6113 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6114
6115 switch (g_aGstShadowInfo[enmGstReg].cb)
6116 {
6117 case sizeof(uint64_t):
6118 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6119 case sizeof(uint32_t):
6120 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6121 case sizeof(uint16_t):
6122 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6123#if 0 /* not present in the table. */
6124 case sizeof(uint8_t):
6125 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6126#endif
6127 default:
6128 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6129 }
6130}
6131
6132
6133/**
6134 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6135 * extending to 64-bit width, extended version.
6136 *
6137 * @returns New code buffer offset on success, UINT32_MAX on failure.
6138 * @param pCodeBuf The code buffer.
6139 * @param off The current code buffer position.
6140 * @param idxHstReg The host register to load the guest register value into.
6141 * @param enmGstReg The guest register to load.
6142 *
6143 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6144 * that is something the caller needs to do if applicable.
6145 */
6146DECL_HIDDEN_THROW(uint32_t)
6147iemNativeEmitLoadGprWithGstShadowRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6148{
6149 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6150 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6151
6152 switch (g_aGstShadowInfo[enmGstReg].cb)
6153 {
6154 case sizeof(uint64_t):
6155 return iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6156 case sizeof(uint32_t):
6157 return iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6158 case sizeof(uint16_t):
6159 return iemNativeEmitLoadGprFromVCpuU16Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6160#if 0 /* not present in the table. */
6161 case sizeof(uint8_t):
6162 return iemNativeEmitLoadGprFromVCpuU8Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6163#endif
6164 default:
6165#ifdef IEM_WITH_THROW_CATCH
6166 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6167#else
6168 AssertReleaseFailedReturn(off);
6169#endif
6170 }
6171}
6172
6173
6174#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6175/**
6176 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6177 *
6178 * @returns New code buffer offset on success, UINT32_MAX on failure.
6179 * @param pReNative The recompiler state.
6180 * @param off The current code buffer position.
6181 * @param idxHstSimdReg The host register to load the guest register value into.
6182 * @param enmGstSimdReg The guest register to load.
6183 * @param enmLoadSz The load size of the register.
6184 *
6185 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6186 * that is something the caller needs to do if applicable.
6187 */
6188DECL_HIDDEN_THROW(uint32_t)
6189iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6190 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6191{
6192 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6193
6194 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6195 switch (enmLoadSz)
6196 {
6197 case kIemNativeGstSimdRegLdStSz_256:
6198 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6199 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6200 case kIemNativeGstSimdRegLdStSz_Low128:
6201 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6202 case kIemNativeGstSimdRegLdStSz_High128:
6203 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6204 default:
6205 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6206 }
6207}
6208#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6209
6210#ifdef VBOX_STRICT
6211
6212/**
6213 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6214 *
6215 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6216 * Trashes EFLAGS on AMD64.
6217 */
6218DECL_FORCE_INLINE(uint32_t)
6219iemNativeEmitTop32BitsClearCheckEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxReg)
6220{
6221# ifdef RT_ARCH_AMD64
6222 /* rol reg64, 32 */
6223 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6224 pCodeBuf[off++] = 0xc1;
6225 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6226 pCodeBuf[off++] = 32;
6227
6228 /* test reg32, ffffffffh */
6229 if (idxReg >= 8)
6230 pCodeBuf[off++] = X86_OP_REX_B;
6231 pCodeBuf[off++] = 0xf7;
6232 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6233 pCodeBuf[off++] = 0xff;
6234 pCodeBuf[off++] = 0xff;
6235 pCodeBuf[off++] = 0xff;
6236 pCodeBuf[off++] = 0xff;
6237
6238 /* je/jz +1 */
6239 pCodeBuf[off++] = 0x74;
6240 pCodeBuf[off++] = 0x01;
6241
6242 /* int3 */
6243 pCodeBuf[off++] = 0xcc;
6244
6245 /* rol reg64, 32 */
6246 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6247 pCodeBuf[off++] = 0xc1;
6248 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6249 pCodeBuf[off++] = 32;
6250
6251# elif defined(RT_ARCH_ARM64)
6252 /* lsr tmp0, reg64, #32 */
6253 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6254 /* cbz tmp0, +1 */
6255 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6256 /* brk #0x1100 */
6257 pCodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6258
6259# else
6260# error "Port me!"
6261# endif
6262 return off;
6263}
6264
6265
6266/**
6267 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6268 *
6269 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6270 * Trashes EFLAGS on AMD64.
6271 */
6272DECL_HIDDEN_THROW(uint32_t)
6273iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6274{
6275# ifdef RT_ARCH_AMD64
6276 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6277# elif defined(RT_ARCH_ARM64)
6278 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6279# else
6280# error "Port me!"
6281# endif
6282 off = iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6283 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6284 return off;
6285}
6286
6287
6288/**
6289 * Emitting code that checks that the content of register @a idxReg is the same
6290 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6291 * instruction if that's not the case.
6292 *
6293 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6294 * Trashes EFLAGS on AMD64.
6295 */
6296DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheckEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf,
6297 uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6298{
6299#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6300 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6301 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6302 return off;
6303#endif
6304
6305# ifdef RT_ARCH_AMD64
6306 /* cmp reg, [mem] */
6307 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6308 {
6309 if (idxReg >= 8)
6310 pCodeBuf[off++] = X86_OP_REX_R;
6311 pCodeBuf[off++] = 0x38;
6312 }
6313 else
6314 {
6315 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6316 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6317 else
6318 {
6319 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6320 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6321 else
6322 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6323 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6324 if (idxReg >= 8)
6325 pCodeBuf[off++] = X86_OP_REX_R;
6326 }
6327 pCodeBuf[off++] = 0x39;
6328 }
6329 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6330
6331 /* je/jz +1 */
6332 pCodeBuf[off++] = 0x74;
6333 pCodeBuf[off++] = 0x01;
6334
6335 /* int3 */
6336 pCodeBuf[off++] = 0xcc;
6337
6338 /* For values smaller than the register size, we must check that the rest
6339 of the register is all zeros. */
6340 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6341 {
6342 /* test reg64, imm32 */
6343 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6344 pCodeBuf[off++] = 0xf7;
6345 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6346 pCodeBuf[off++] = 0;
6347 pCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6348 pCodeBuf[off++] = 0xff;
6349 pCodeBuf[off++] = 0xff;
6350
6351 /* je/jz +1 */
6352 pCodeBuf[off++] = 0x74;
6353 pCodeBuf[off++] = 0x01;
6354
6355 /* int3 */
6356 pCodeBuf[off++] = 0xcc;
6357 }
6358 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6359 iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6360 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6361
6362# elif defined(RT_ARCH_ARM64)
6363 /* mov TMP0, [gstreg] */
6364 off = iemNativeEmitLoadGprWithGstShadowRegEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6365
6366 /* sub tmp0, tmp0, idxReg */
6367 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6368 /* cbz tmp0, +2 */
6369 pCodeBuf[off++] = Armv8A64MkInstrCbz(2, IEMNATIVE_REG_FIXED_TMP0);
6370 /* brk #0x1000+enmGstReg */
6371 pCodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6372 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6373
6374# else
6375# error "Port me!"
6376# endif
6377 return off;
6378}
6379
6380
6381/**
6382 * Emitting code that checks that the content of register @a idxReg is the same
6383 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6384 * instruction if that's not the case.
6385 *
6386 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6387 * Trashes EFLAGS on AMD64.
6388 */
6389DECL_HIDDEN_THROW(uint32_t)
6390iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6391{
6392#ifdef RT_ARCH_AMD64
6393 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6394#elif defined(RT_ARCH_ARM64)
6395 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6396# else
6397# error "Port me!"
6398# endif
6399 return iemNativeEmitGuestRegValueCheckEx(pReNative, pCodeBuf, off, idxReg, enmGstReg);
6400}
6401
6402# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6403# ifdef RT_ARCH_AMD64
6404/**
6405 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6406 */
6407DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6408{
6409 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6410 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6411 if (idxSimdReg >= 8)
6412 pbCodeBuf[off++] = X86_OP_REX_R;
6413 pbCodeBuf[off++] = 0x0f;
6414 pbCodeBuf[off++] = 0x38;
6415 pbCodeBuf[off++] = 0x29;
6416 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6417
6418 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6419 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6420 pbCodeBuf[off++] = X86_OP_REX_W
6421 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6422 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6423 pbCodeBuf[off++] = 0x0f;
6424 pbCodeBuf[off++] = 0x3a;
6425 pbCodeBuf[off++] = 0x16;
6426 pbCodeBuf[off++] = 0xeb;
6427 pbCodeBuf[off++] = 0x00;
6428
6429 /* cmp tmp0, 0xffffffffffffffff. */
6430 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6431 pbCodeBuf[off++] = 0x83;
6432 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6433 pbCodeBuf[off++] = 0xff;
6434
6435 /* je/jz +1 */
6436 pbCodeBuf[off++] = 0x74;
6437 pbCodeBuf[off++] = 0x01;
6438
6439 /* int3 */
6440 pbCodeBuf[off++] = 0xcc;
6441
6442 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6443 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6444 pbCodeBuf[off++] = X86_OP_REX_W
6445 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6446 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6447 pbCodeBuf[off++] = 0x0f;
6448 pbCodeBuf[off++] = 0x3a;
6449 pbCodeBuf[off++] = 0x16;
6450 pbCodeBuf[off++] = 0xeb;
6451 pbCodeBuf[off++] = 0x01;
6452
6453 /* cmp tmp0, 0xffffffffffffffff. */
6454 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6455 pbCodeBuf[off++] = 0x83;
6456 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6457 pbCodeBuf[off++] = 0xff;
6458
6459 /* je/jz +1 */
6460 pbCodeBuf[off++] = 0x74;
6461 pbCodeBuf[off++] = 0x01;
6462
6463 /* int3 */
6464 pbCodeBuf[off++] = 0xcc;
6465
6466 return off;
6467}
6468# endif
6469
6470
6471/**
6472 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6473 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6474 * instruction if that's not the case.
6475 *
6476 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6477 * Trashes EFLAGS on AMD64.
6478 */
6479DECL_HIDDEN_THROW(uint32_t)
6480iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6481 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6482{
6483 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6484 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6485 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6486 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6487 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6488 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6489 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6490 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6491 return off;
6492
6493# ifdef RT_ARCH_AMD64
6494 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6495 {
6496 /* movdqa vectmp0, idxSimdReg */
6497 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6498
6499 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6500
6501 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6502 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6503 }
6504
6505 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6506 {
6507 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6508 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6509
6510 /* vextracti128 vectmp0, idxSimdReg, 1 */
6511 pbCodeBuf[off++] = X86_OP_VEX3;
6512 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6513 | X86_OP_VEX3_BYTE1_X
6514 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6515 | 0x03; /* Opcode map */
6516 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6517 pbCodeBuf[off++] = 0x39;
6518 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6519 pbCodeBuf[off++] = 0x01;
6520
6521 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6522 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6523 }
6524# elif defined(RT_ARCH_ARM64)
6525 /* mov vectmp0, [gstreg] */
6526 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6527
6528 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6529 {
6530 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6531 /* eor vectmp0, vectmp0, idxSimdReg */
6532 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6533 /* uaddlv vectmp0, vectmp0.16B */
6534 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6535 /* umov tmp0, vectmp0.H[0] */
6536 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6537 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6538 /* cbz tmp0, +1 */
6539 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6540 /* brk #0x1000+enmGstReg */
6541 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6542 }
6543
6544 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6545 {
6546 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6547 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6548 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6549 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6550 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6551 /* umov tmp0, (vectmp0 + 1).H[0] */
6552 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6553 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6554 /* cbz tmp0, +1 */
6555 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6556 /* brk #0x1000+enmGstReg */
6557 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6558 }
6559
6560# else
6561# error "Port me!"
6562# endif
6563
6564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6565 return off;
6566}
6567# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6568
6569
6570/**
6571 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6572 * important bits.
6573 *
6574 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6575 * Trashes EFLAGS on AMD64.
6576 */
6577DECL_HIDDEN_THROW(uint32_t)
6578iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6579{
6580 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6581 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6582 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6583 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6584
6585#ifdef RT_ARCH_AMD64
6586 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6587
6588 /* je/jz +1 */
6589 pbCodeBuf[off++] = 0x74;
6590 pbCodeBuf[off++] = 0x01;
6591
6592 /* int3 */
6593 pbCodeBuf[off++] = 0xcc;
6594
6595# elif defined(RT_ARCH_ARM64)
6596 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6597
6598 /* b.eq +1 */
6599 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6600 /* brk #0x2000 */
6601 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6602
6603# else
6604# error "Port me!"
6605# endif
6606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6607
6608 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6609 return off;
6610}
6611
6612#endif /* VBOX_STRICT */
6613
6614
6615#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6616/**
6617 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6618 */
6619DECL_HIDDEN_THROW(uint32_t)
6620iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6621{
6622 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6623
6624 fEflNeeded &= X86_EFL_STATUS_BITS;
6625 if (fEflNeeded)
6626 {
6627# ifdef RT_ARCH_AMD64
6628 /* test dword [pVCpu + offVCpu], imm32 */
6629 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6630 if (fEflNeeded <= 0xff)
6631 {
6632 pCodeBuf[off++] = 0xf6;
6633 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6634 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6635 }
6636 else
6637 {
6638 pCodeBuf[off++] = 0xf7;
6639 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6640 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6641 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6642 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6643 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6644 }
6645
6646 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6647 pCodeBuf[off++] = 0xcc;
6648
6649 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6650
6651# else
6652 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6653 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6654 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6655# ifdef RT_ARCH_ARM64
6656 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6657 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6658# else
6659# error "Port me!"
6660# endif
6661 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6662# endif
6663 }
6664 return off;
6665}
6666#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6667
6668
6669/**
6670 * Emits a code for checking the return code of a call and rcPassUp, returning
6671 * from the code if either are non-zero.
6672 */
6673DECL_HIDDEN_THROW(uint32_t)
6674iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6675{
6676#ifdef RT_ARCH_AMD64
6677 /*
6678 * AMD64: eax = call status code.
6679 */
6680
6681 /* edx = rcPassUp */
6682 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6683# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6684 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6685# endif
6686
6687 /* edx = eax | rcPassUp */
6688 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6689 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6690 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6691 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6692
6693 /* Jump to non-zero status return path. */
6694 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, off);
6695
6696 /* done. */
6697
6698#elif RT_ARCH_ARM64
6699 /*
6700 * ARM64: w0 = call status code.
6701 */
6702 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+3+3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
6703
6704# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6705 AssertCompile(ARMV8_A64_REG_X2 == IEMNATIVE_CALL_ARG2_GREG);
6706 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, ARMV8_A64_REG_X2, idxInstr);
6707# endif
6708 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6709
6710 pCodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6711
6712 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, pCodeBuf, off,
6713 ARMV8_A64_REG_X4, true /*f64Bit*/);
6714
6715#else
6716# error "port me"
6717#endif
6718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6719 RT_NOREF_PV(idxInstr);
6720 return off;
6721}
6722
6723
6724/**
6725 * Emits a call to a CImpl function or something similar.
6726 */
6727DECL_HIDDEN_THROW(uint32_t)
6728iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6729 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6730{
6731 /* Writeback everything. */
6732 off = iemNativeRegFlushPendingWrites(pReNative, off);
6733
6734 /*
6735 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6736 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6737 */
6738 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6739 fGstShwFlush
6740 | RT_BIT_64(kIemNativeGstReg_Pc)
6741 | RT_BIT_64(kIemNativeGstReg_EFlags));
6742 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6743
6744 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6745
6746 /*
6747 * Load the parameters.
6748 */
6749#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6750 /* Special code the hidden VBOXSTRICTRC pointer. */
6751 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6752 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6753 if (cAddParams > 0)
6754 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6755 if (cAddParams > 1)
6756 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6757 if (cAddParams > 2)
6758 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6759 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6760
6761#else
6762 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6763 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6764 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6765 if (cAddParams > 0)
6766 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6767 if (cAddParams > 1)
6768 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6769 if (cAddParams > 2)
6770# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6772# else
6773 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6774# endif
6775#endif
6776
6777 /*
6778 * Make the call.
6779 */
6780 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6781
6782#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6783 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6784#endif
6785
6786#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6787 pReNative->Core.fDebugPcInitialized = false;
6788 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6789#endif
6790
6791 /*
6792 * Check the status code.
6793 */
6794 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6795}
6796
6797
6798/**
6799 * Emits a call to a threaded worker function.
6800 */
6801DECL_HIDDEN_THROW(uint32_t)
6802iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6803{
6804 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6805 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6806
6807 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6808 off = iemNativeRegFlushPendingWrites(pReNative, off);
6809
6810 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6811 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6812
6813#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6814 /* The threaded function may throw / long jmp, so set current instruction
6815 number if we're counting. */
6816 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6817#endif
6818
6819 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6820
6821#ifdef RT_ARCH_AMD64
6822 /* Load the parameters and emit the call. */
6823# ifdef RT_OS_WINDOWS
6824# ifndef VBOXSTRICTRC_STRICT_ENABLED
6825 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6826 if (cParams > 0)
6827 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6828 if (cParams > 1)
6829 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6830 if (cParams > 2)
6831 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6832# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6833 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6834 if (cParams > 0)
6835 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6836 if (cParams > 1)
6837 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6838 if (cParams > 2)
6839 {
6840 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6841 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6842 }
6843 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6844# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6845# else
6846 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6847 if (cParams > 0)
6848 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6849 if (cParams > 1)
6850 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6851 if (cParams > 2)
6852 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6853# endif
6854
6855 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6856
6857# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6858 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6859# endif
6860
6861#elif RT_ARCH_ARM64
6862 /*
6863 * ARM64:
6864 */
6865 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6866 if (cParams > 0)
6867 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6868 if (cParams > 1)
6869 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6870 if (cParams > 2)
6871 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6872
6873 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6874
6875#else
6876# error "port me"
6877#endif
6878
6879#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6880 pReNative->Core.fDebugPcInitialized = false;
6881 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6882#endif
6883
6884 /*
6885 * Check the status code.
6886 */
6887 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6888
6889 return off;
6890}
6891
6892
6893/**
6894 * The default liveness function, matching iemNativeEmitThreadedCall.
6895 */
6896IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6897{
6898 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6899 RT_NOREF(pCallEntry);
6900}
6901
6902#ifdef VBOX_WITH_STATISTICS
6903
6904/**
6905 * Emits code to update the thread call statistics.
6906 */
6907DECL_INLINE_THROW(uint32_t)
6908iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6909{
6910 /*
6911 * Update threaded function stats.
6912 */
6913 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6914 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6915# if defined(RT_ARCH_ARM64)
6916 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6917 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6918 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6919 iemNativeRegFreeTmp(pReNative, idxTmp1);
6920 iemNativeRegFreeTmp(pReNative, idxTmp2);
6921# else
6922 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6923# endif
6924 return off;
6925}
6926
6927
6928/**
6929 * Emits code to update the TB exit reason statistics.
6930 */
6931DECL_INLINE_THROW(uint32_t)
6932iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6933{
6934 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6935 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6936 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6937 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6938 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6939
6940 return off;
6941}
6942
6943#endif /* VBOX_WITH_STATISTICS */
6944
6945/**
6946 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6947 */
6948static uint32_t
6949iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6950{
6951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6952 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6953
6954 /* Jump to ReturnBreak if the return register is NULL. */
6955 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6956 true /*f64Bit*/, offReturnBreak);
6957
6958 /* Okay, continue executing the next TB. */
6959 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6960 return off;
6961}
6962
6963
6964/**
6965 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6966 */
6967static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6968{
6969 /* set the return status */
6970 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6971}
6972
6973
6974/**
6975 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6976 */
6977static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6978{
6979 /* set the return status */
6980 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6981}
6982
6983
6984/**
6985 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6986 */
6987static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6988{
6989 /* set the return status */
6990 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6991}
6992
6993
6994/**
6995 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6996 */
6997static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6998{
6999 /*
7000 * Generate the rc + rcPassUp fiddling code.
7001 */
7002 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7003#ifdef RT_ARCH_AMD64
7004# ifdef RT_OS_WINDOWS
7005# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7006 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7007# endif
7008 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7009 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7010# else
7011 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7013# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7014 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7015# endif
7016# endif
7017# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7018 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7019# endif
7020
7021#else
7022 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7023 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7024 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7025#endif
7026
7027 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7028 return off;
7029}
7030
7031
7032/**
7033 * Emits a standard epilog.
7034 */
7035static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7036{
7037 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7038
7039 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
7040 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7041
7042 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7043 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7044
7045 /*
7046 * Restore registers and return.
7047 */
7048#ifdef RT_ARCH_AMD64
7049 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7050
7051 /* Reposition esp at the r15 restore point. */
7052 pbCodeBuf[off++] = X86_OP_REX_W;
7053 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7054 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7055 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7056
7057 /* Pop non-volatile registers and return */
7058 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7059 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7060 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7061 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7062 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7063 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7064 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7065 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7066# ifdef RT_OS_WINDOWS
7067 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7068 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7069# endif
7070 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7071 pbCodeBuf[off++] = 0xc9; /* leave */
7072 pbCodeBuf[off++] = 0xc3; /* ret */
7073 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7074
7075#elif RT_ARCH_ARM64
7076 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7077
7078 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7079 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7080 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7081 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7082 IEMNATIVE_FRAME_VAR_SIZE / 8);
7083 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7084 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7085 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7086 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7087 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7088 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7089 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7090 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7091 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7092 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7093 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7094 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7095
7096 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7097 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7098 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7099 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7100
7101 /* retab / ret */
7102# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7103 if (1)
7104 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7105 else
7106# endif
7107 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7108
7109#else
7110# error "port me"
7111#endif
7112 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7113
7114 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
7115 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7116
7117 return off;
7118}
7119
7120
7121
7122/*********************************************************************************************************************************
7123* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7124*********************************************************************************************************************************/
7125
7126/**
7127 * Internal work that allocates a variable with kind set to
7128 * kIemNativeVarKind_Invalid and no current stack allocation.
7129 *
7130 * The kind will either be set by the caller or later when the variable is first
7131 * assigned a value.
7132 *
7133 * @returns Unpacked index.
7134 * @internal
7135 */
7136static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7137{
7138 Assert(cbType > 0 && cbType <= 64);
7139 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7140 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7141 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7142 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7143 pReNative->Core.aVars[idxVar].cbVar = cbType;
7144 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7145 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7146 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7147 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7148 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7149 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7150 pReNative->Core.aVars[idxVar].u.uValue = 0;
7151#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7152 pReNative->Core.aVars[idxVar].fSimdReg = false;
7153#endif
7154 return idxVar;
7155}
7156
7157
7158/**
7159 * Internal work that allocates an argument variable w/o setting enmKind.
7160 *
7161 * @returns Unpacked index.
7162 * @internal
7163 */
7164static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7165{
7166 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7167 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7168 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7169
7170 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7171 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7172 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7173 return idxVar;
7174}
7175
7176
7177/**
7178 * Gets the stack slot for a stack variable, allocating one if necessary.
7179 *
7180 * Calling this function implies that the stack slot will contain a valid
7181 * variable value. The caller deals with any register currently assigned to the
7182 * variable, typically by spilling it into the stack slot.
7183 *
7184 * @returns The stack slot number.
7185 * @param pReNative The recompiler state.
7186 * @param idxVar The variable.
7187 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7188 */
7189DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7190{
7191 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7192 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7193 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7194
7195 /* Already got a slot? */
7196 uint8_t const idxStackSlot = pVar->idxStackSlot;
7197 if (idxStackSlot != UINT8_MAX)
7198 {
7199 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7200 return idxStackSlot;
7201 }
7202
7203 /*
7204 * A single slot is easy to allocate.
7205 * Allocate them from the top end, closest to BP, to reduce the displacement.
7206 */
7207 if (pVar->cbVar <= sizeof(uint64_t))
7208 {
7209 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7210 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7211 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7212 pVar->idxStackSlot = (uint8_t)iSlot;
7213 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7214 return (uint8_t)iSlot;
7215 }
7216
7217 /*
7218 * We need more than one stack slot.
7219 *
7220 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7221 */
7222 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7223 Assert(pVar->cbVar <= 64);
7224 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7225 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7226 uint32_t bmStack = pReNative->Core.bmStack;
7227 while (bmStack != UINT32_MAX)
7228 {
7229 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7230 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7231 iSlot = (iSlot - 1) & ~fBitAlignMask;
7232 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7233 {
7234 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7235 pVar->idxStackSlot = (uint8_t)iSlot;
7236 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7237 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7238 return (uint8_t)iSlot;
7239 }
7240
7241 bmStack |= (fBitAllocMask << iSlot);
7242 }
7243 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7244}
7245
7246
7247/**
7248 * Changes the variable to a stack variable.
7249 *
7250 * Currently this is s only possible to do the first time the variable is used,
7251 * switching later is can be implemented but not done.
7252 *
7253 * @param pReNative The recompiler state.
7254 * @param idxVar The variable.
7255 * @throws VERR_IEM_VAR_IPE_2
7256 */
7257DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7258{
7259 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7260 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7261 if (pVar->enmKind != kIemNativeVarKind_Stack)
7262 {
7263 /* We could in theory transition from immediate to stack as well, but it
7264 would involve the caller doing work storing the value on the stack. So,
7265 till that's required we only allow transition from invalid. */
7266 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7267 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7268 pVar->enmKind = kIemNativeVarKind_Stack;
7269
7270 /* Note! We don't allocate a stack slot here, that's only done when a
7271 slot is actually needed to hold a variable value. */
7272 }
7273}
7274
7275
7276/**
7277 * Sets it to a variable with a constant value.
7278 *
7279 * This does not require stack storage as we know the value and can always
7280 * reload it, unless of course it's referenced.
7281 *
7282 * @param pReNative The recompiler state.
7283 * @param idxVar The variable.
7284 * @param uValue The immediate value.
7285 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7286 */
7287DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7288{
7289 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7290 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7291 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7292 {
7293 /* Only simple transitions for now. */
7294 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7295 pVar->enmKind = kIemNativeVarKind_Immediate;
7296 }
7297 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7298
7299 pVar->u.uValue = uValue;
7300 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7301 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7302 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7303}
7304
7305
7306/**
7307 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7308 *
7309 * This does not require stack storage as we know the value and can always
7310 * reload it. Loading is postponed till needed.
7311 *
7312 * @param pReNative The recompiler state.
7313 * @param idxVar The variable. Unpacked.
7314 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7315 *
7316 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7317 * @internal
7318 */
7319static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7320{
7321 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7322 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7323
7324 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7325 {
7326 /* Only simple transitions for now. */
7327 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7328 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7329 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7330 }
7331 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7332
7333 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7334
7335 /* Update the other variable, ensure it's a stack variable. */
7336 /** @todo handle variables with const values... that'll go boom now. */
7337 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7338 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7339}
7340
7341
7342/**
7343 * Sets the variable to a reference (pointer) to a guest register reference.
7344 *
7345 * This does not require stack storage as we know the value and can always
7346 * reload it. Loading is postponed till needed.
7347 *
7348 * @param pReNative The recompiler state.
7349 * @param idxVar The variable.
7350 * @param enmRegClass The class guest registers to reference.
7351 * @param idxReg The register within @a enmRegClass to reference.
7352 *
7353 * @throws VERR_IEM_VAR_IPE_2
7354 */
7355DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7356 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7357{
7358 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7359 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7360
7361 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7362 {
7363 /* Only simple transitions for now. */
7364 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7365 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7366 }
7367 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7368
7369 pVar->u.GstRegRef.enmClass = enmRegClass;
7370 pVar->u.GstRegRef.idx = idxReg;
7371}
7372
7373
7374DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7375{
7376 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7377}
7378
7379
7380DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7381{
7382 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7383
7384 /* Since we're using a generic uint64_t value type, we must truncate it if
7385 the variable is smaller otherwise we may end up with too large value when
7386 scaling up a imm8 w/ sign-extension.
7387
7388 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7389 in the bios, bx=1) when running on arm, because clang expect 16-bit
7390 register parameters to have bits 16 and up set to zero. Instead of
7391 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7392 CF value in the result. */
7393 switch (cbType)
7394 {
7395 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7396 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7397 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7398 }
7399 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7400 return idxVar;
7401}
7402
7403
7404DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7405{
7406 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7407 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7408 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7409 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7410 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7411 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7412
7413 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7414 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7415 return idxArgVar;
7416}
7417
7418
7419DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7420{
7421 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7422 /* Don't set to stack now, leave that to the first use as for instance
7423 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7424 return idxVar;
7425}
7426
7427
7428DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7429{
7430 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7431
7432 /* Since we're using a generic uint64_t value type, we must truncate it if
7433 the variable is smaller otherwise we may end up with too large value when
7434 scaling up a imm8 w/ sign-extension. */
7435 switch (cbType)
7436 {
7437 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7438 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7439 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7440 }
7441 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7442 return idxVar;
7443}
7444
7445
7446DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7447 uint8_t cbType, uint8_t idxVarOther)
7448{
7449 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7450 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7451
7452 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7453 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7454
7455/** @todo combine MOV and AND using MOVZX/similar. */
7456 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7457
7458 /* Truncate the value to this variables size. */
7459 switch (cbType)
7460 {
7461 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7462 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7463 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7464 }
7465
7466 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7467 iemNativeVarRegisterRelease(pReNative, idxVar);
7468 return idxVar;
7469}
7470
7471
7472/**
7473 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7474 * fixed till we call iemNativeVarRegisterRelease.
7475 *
7476 * @returns The host register number.
7477 * @param pReNative The recompiler state.
7478 * @param idxVar The variable.
7479 * @param poff Pointer to the instruction buffer offset.
7480 * In case a register needs to be freed up or the value
7481 * loaded off the stack.
7482 * @param fInitialized Set if the variable must already have been
7483 * initialized. Will throw VERR_IEM_VAR_NOT_INITIALIZED
7484 * if this is not the case.
7485 * @param idxRegPref Preferred register number or UINT8_MAX.
7486 *
7487 * @note Must not modify the host status flags!
7488 */
7489DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7490 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7491{
7492 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7493 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7494 Assert(pVar->cbVar <= 8);
7495 Assert(!pVar->fRegAcquired);
7496
7497 uint8_t idxReg = pVar->idxReg;
7498 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7499 {
7500 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7501 && pVar->enmKind < kIemNativeVarKind_End);
7502 pVar->fRegAcquired = true;
7503 return idxReg;
7504 }
7505
7506 /*
7507 * If the kind of variable has not yet been set, default to 'stack'.
7508 */
7509 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7510 && pVar->enmKind < kIemNativeVarKind_End);
7511 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7512 iemNativeVarSetKindToStack(pReNative, idxVar);
7513
7514 /*
7515 * We have to allocate a register for the variable, even if its a stack one
7516 * as we don't know if there are modification being made to it before its
7517 * finalized (todo: analyze and insert hints about that?).
7518 *
7519 * If we can, we try get the correct register for argument variables. This
7520 * is assuming that most argument variables are fetched as close as possible
7521 * to the actual call, so that there aren't any interfering hidden calls
7522 * (memory accesses, etc) inbetween.
7523 *
7524 * If we cannot or it's a variable, we make sure no argument registers
7525 * that will be used by this MC block will be allocated here, and we always
7526 * prefer non-volatile registers to avoid needing to spill stuff for internal
7527 * call.
7528 */
7529 /** @todo Detect too early argument value fetches and warn about hidden
7530 * calls causing less optimal code to be generated in the python script. */
7531
7532 uint8_t const uArgNo = pVar->uArgNo;
7533 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7534 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7535 {
7536 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7537
7538#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7539 /* Writeback any dirty shadow registers we are about to unshadow. */
7540 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7541#endif
7542
7543 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7544 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7545 }
7546 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7547 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7548 {
7549 /** @todo there must be a better way for this and boot cArgsX? */
7550 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7551 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7552 & ~pReNative->Core.bmHstRegsWithGstShadow
7553 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7554 & fNotArgsMask;
7555 if (fRegs)
7556 {
7557 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7558 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7559 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7560 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7561 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7562 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7563 }
7564 else
7565 {
7566 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7567 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7568 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7569 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7570 }
7571 }
7572 else
7573 {
7574 idxReg = idxRegPref;
7575 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7576 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7577 }
7578 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7579 pVar->idxReg = idxReg;
7580
7581#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7582 pVar->fSimdReg = false;
7583#endif
7584
7585 /*
7586 * Load it off the stack if we've got a stack slot.
7587 */
7588 uint8_t const idxStackSlot = pVar->idxStackSlot;
7589 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7590 {
7591 Assert(fInitialized);
7592 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7593 switch (pVar->cbVar)
7594 {
7595 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7596 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7597 case 3: AssertFailed(); RT_FALL_THRU();
7598 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7599 default: AssertFailed(); RT_FALL_THRU();
7600 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7601 }
7602 }
7603 else
7604 {
7605 Assert(idxStackSlot == UINT8_MAX);
7606 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7607 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7608 else
7609 {
7610 /*
7611 * Convert from immediate to stack/register. This is currently only
7612 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7613 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7614 */
7615 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7616 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7617 idxVar, idxReg, pVar->u.uValue));
7618 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7619 pVar->enmKind = kIemNativeVarKind_Stack;
7620 }
7621 }
7622
7623 pVar->fRegAcquired = true;
7624 return idxReg;
7625}
7626
7627
7628#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7629/**
7630 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7631 * fixed till we call iemNativeVarRegisterRelease.
7632 *
7633 * @returns The host register number.
7634 * @param pReNative The recompiler state.
7635 * @param idxVar The variable.
7636 * @param poff Pointer to the instruction buffer offset.
7637 * In case a register needs to be freed up or the value
7638 * loaded off the stack.
7639 * @param fInitialized Set if the variable must already have been initialized.
7640 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7641 * the case.
7642 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7643 */
7644DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7645 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7646{
7647 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7648 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7649 Assert( pVar->cbVar == sizeof(RTUINT128U)
7650 || pVar->cbVar == sizeof(RTUINT256U));
7651 Assert(!pVar->fRegAcquired);
7652
7653 uint8_t idxReg = pVar->idxReg;
7654 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7655 {
7656 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7657 && pVar->enmKind < kIemNativeVarKind_End);
7658 pVar->fRegAcquired = true;
7659 return idxReg;
7660 }
7661
7662 /*
7663 * If the kind of variable has not yet been set, default to 'stack'.
7664 */
7665 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7666 && pVar->enmKind < kIemNativeVarKind_End);
7667 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7668 iemNativeVarSetKindToStack(pReNative, idxVar);
7669
7670 /*
7671 * We have to allocate a register for the variable, even if its a stack one
7672 * as we don't know if there are modification being made to it before its
7673 * finalized (todo: analyze and insert hints about that?).
7674 *
7675 * If we can, we try get the correct register for argument variables. This
7676 * is assuming that most argument variables are fetched as close as possible
7677 * to the actual call, so that there aren't any interfering hidden calls
7678 * (memory accesses, etc) inbetween.
7679 *
7680 * If we cannot or it's a variable, we make sure no argument registers
7681 * that will be used by this MC block will be allocated here, and we always
7682 * prefer non-volatile registers to avoid needing to spill stuff for internal
7683 * call.
7684 */
7685 /** @todo Detect too early argument value fetches and warn about hidden
7686 * calls causing less optimal code to be generated in the python script. */
7687
7688 uint8_t const uArgNo = pVar->uArgNo;
7689 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7690
7691 /* SIMD is bit simpler for now because there is no support for arguments. */
7692 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7693 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7694 {
7695 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7696 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7697 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7698 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7699 & fNotArgsMask;
7700 if (fRegs)
7701 {
7702 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7703 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7704 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7705 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7706 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7707 }
7708 else
7709 {
7710 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7711 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7712 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7713 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7714 }
7715 }
7716 else
7717 {
7718 idxReg = idxRegPref;
7719 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7720 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7721 }
7722 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7723
7724 pVar->fSimdReg = true;
7725 pVar->idxReg = idxReg;
7726
7727 /*
7728 * Load it off the stack if we've got a stack slot.
7729 */
7730 uint8_t const idxStackSlot = pVar->idxStackSlot;
7731 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7732 {
7733 Assert(fInitialized);
7734 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7735 switch (pVar->cbVar)
7736 {
7737 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7738 default: AssertFailed(); RT_FALL_THRU();
7739 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7740 }
7741 }
7742 else
7743 {
7744 Assert(idxStackSlot == UINT8_MAX);
7745 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7746 }
7747 pVar->fRegAcquired = true;
7748 return idxReg;
7749}
7750#endif
7751
7752
7753/**
7754 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7755 * guest register.
7756 *
7757 * This function makes sure there is a register for it and sets it to be the
7758 * current shadow copy of @a enmGstReg.
7759 *
7760 * @returns The host register number.
7761 * @param pReNative The recompiler state.
7762 * @param idxVar The variable.
7763 * @param enmGstReg The guest register this variable will be written to
7764 * after this call.
7765 * @param poff Pointer to the instruction buffer offset.
7766 * In case a register needs to be freed up or if the
7767 * variable content needs to be loaded off the stack.
7768 *
7769 * @note We DO NOT expect @a idxVar to be an argument variable,
7770 * because we can only in the commit stage of an instruction when this
7771 * function is used.
7772 */
7773DECL_HIDDEN_THROW(uint8_t)
7774iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7775{
7776 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7777 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7778 Assert(!pVar->fRegAcquired);
7779 AssertMsgStmt( pVar->cbVar <= 8
7780 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7781 || pVar->enmKind == kIemNativeVarKind_Stack),
7782 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7783 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7784 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7785
7786 /*
7787 * This shouldn't ever be used for arguments, unless it's in a weird else
7788 * branch that doesn't do any calling and even then it's questionable.
7789 *
7790 * However, in case someone writes crazy wrong MC code and does register
7791 * updates before making calls, just use the regular register allocator to
7792 * ensure we get a register suitable for the intended argument number.
7793 */
7794 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7795
7796 /*
7797 * If there is already a register for the variable, we transfer/set the
7798 * guest shadow copy assignment to it.
7799 */
7800 uint8_t idxReg = pVar->idxReg;
7801 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7802 {
7803#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7804 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7805 {
7806# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7807 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7808 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7809# endif
7810 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7811 }
7812#endif
7813
7814 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7815 {
7816 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7817 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7818 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7819 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7820 }
7821 else
7822 {
7823 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7824 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7825 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7826 }
7827 /** @todo figure this one out. We need some way of making sure the register isn't
7828 * modified after this point, just in case we start writing crappy MC code. */
7829 pVar->enmGstReg = enmGstReg;
7830 pVar->fRegAcquired = true;
7831 return idxReg;
7832 }
7833 Assert(pVar->uArgNo == UINT8_MAX);
7834
7835 /*
7836 * Because this is supposed to be the commit stage, we're just tag along with the
7837 * temporary register allocator and upgrade it to a variable register.
7838 */
7839 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7840 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7841 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7842 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7843 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7844 pVar->idxReg = idxReg;
7845
7846 /*
7847 * Now we need to load the register value.
7848 */
7849 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7850 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7851 else
7852 {
7853 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7854 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7855 switch (pVar->cbVar)
7856 {
7857 case sizeof(uint64_t):
7858 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7859 break;
7860 case sizeof(uint32_t):
7861 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7862 break;
7863 case sizeof(uint16_t):
7864 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7865 break;
7866 case sizeof(uint8_t):
7867 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7868 break;
7869 default:
7870 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7871 }
7872 }
7873
7874 pVar->fRegAcquired = true;
7875 return idxReg;
7876}
7877
7878
7879/**
7880 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7881 *
7882 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7883 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7884 * requirement of flushing anything in volatile host registers when making a
7885 * call.
7886 *
7887 * @returns New @a off value.
7888 * @param pReNative The recompiler state.
7889 * @param off The code buffer position.
7890 * @param fHstRegsNotToSave Set of registers not to save & restore.
7891 */
7892DECL_HIDDEN_THROW(uint32_t)
7893iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7894{
7895 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7896 if (fHstRegs)
7897 {
7898 do
7899 {
7900 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7901 fHstRegs &= ~RT_BIT_32(idxHstReg);
7902
7903 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7904 {
7905 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7906 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7907 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7908 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7909 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7910 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7911 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7912 {
7913 case kIemNativeVarKind_Stack:
7914 {
7915 /* Temporarily spill the variable register. */
7916 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7917 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7918 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7919 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7920 continue;
7921 }
7922
7923 case kIemNativeVarKind_Immediate:
7924 case kIemNativeVarKind_VarRef:
7925 case kIemNativeVarKind_GstRegRef:
7926 /* It is weird to have any of these loaded at this point. */
7927 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7928 continue;
7929
7930 case kIemNativeVarKind_End:
7931 case kIemNativeVarKind_Invalid:
7932 break;
7933 }
7934 AssertFailed();
7935 }
7936 else
7937 {
7938 /*
7939 * Allocate a temporary stack slot and spill the register to it.
7940 */
7941 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7942 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7943 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7944 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7945 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7946 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7947 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7948 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7949 }
7950 } while (fHstRegs);
7951 }
7952#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7953
7954 /*
7955 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7956 * which would be more difficult due to spanning multiple stack slots and different sizes
7957 * (besides we only have a limited amount of slots at the moment).
7958 *
7959 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7960 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7961 */
7962 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7963
7964 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
7965 if (fHstRegs)
7966 {
7967 do
7968 {
7969 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7970 fHstRegs &= ~RT_BIT_32(idxHstReg);
7971
7972 /* Fixed reserved and temporary registers don't need saving. */
7973 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved
7974 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp); included below */
7975 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7976
7977 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7978 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7979 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7980 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7981 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7982 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7983 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7984 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7985 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7986 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7987 {
7988 case kIemNativeVarKind_Stack:
7989 {
7990 /* Temporarily spill the variable register. */
7991 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7992 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7993 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7994 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7995 if (cbVar == sizeof(RTUINT128U))
7996 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7997 else
7998 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7999 continue;
8000 }
8001
8002 case kIemNativeVarKind_Immediate:
8003 case kIemNativeVarKind_VarRef:
8004 case kIemNativeVarKind_GstRegRef:
8005 /* It is weird to have any of these loaded at this point. */
8006 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8007 continue;
8008
8009 case kIemNativeVarKind_End:
8010 case kIemNativeVarKind_Invalid:
8011 break;
8012 }
8013 AssertFailed();
8014 } while (fHstRegs);
8015 }
8016#endif
8017 return off;
8018}
8019
8020
8021/**
8022 * Emit code to restore volatile registers after to a call to a helper.
8023 *
8024 * @returns New @a off value.
8025 * @param pReNative The recompiler state.
8026 * @param off The code buffer position.
8027 * @param fHstRegsNotToSave Set of registers not to save & restore.
8028 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8029 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8030 */
8031DECL_HIDDEN_THROW(uint32_t)
8032iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8033{
8034 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
8035 if (fHstRegs)
8036 {
8037 do
8038 {
8039 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8040 fHstRegs &= ~RT_BIT_32(idxHstReg);
8041
8042 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8043 {
8044 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8045 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8046 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8047 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8048 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8049 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8050 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8051 {
8052 case kIemNativeVarKind_Stack:
8053 {
8054 /* Unspill the variable register. */
8055 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8056 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8057 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8058 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8059 continue;
8060 }
8061
8062 case kIemNativeVarKind_Immediate:
8063 case kIemNativeVarKind_VarRef:
8064 case kIemNativeVarKind_GstRegRef:
8065 /* It is weird to have any of these loaded at this point. */
8066 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8067 continue;
8068
8069 case kIemNativeVarKind_End:
8070 case kIemNativeVarKind_Invalid:
8071 break;
8072 }
8073 AssertFailed();
8074 }
8075 else
8076 {
8077 /*
8078 * Restore from temporary stack slot.
8079 */
8080 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8081 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8082 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8083 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8084
8085 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8086 }
8087 } while (fHstRegs);
8088 }
8089#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8090 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
8091 if (fHstRegs)
8092 {
8093 do
8094 {
8095 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8096 fHstRegs &= ~RT_BIT_32(idxHstReg);
8097
8098 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp
8099 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved); - included below. */
8100 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8101
8102 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8104 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8105 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8106 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8107 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8108 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8109 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8110 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8111 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8112 {
8113 case kIemNativeVarKind_Stack:
8114 {
8115 /* Unspill the variable register. */
8116 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8117 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8118 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8119 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8120
8121 if (cbVar == sizeof(RTUINT128U))
8122 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8123 else
8124 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8125 continue;
8126 }
8127
8128 case kIemNativeVarKind_Immediate:
8129 case kIemNativeVarKind_VarRef:
8130 case kIemNativeVarKind_GstRegRef:
8131 /* It is weird to have any of these loaded at this point. */
8132 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8133 continue;
8134
8135 case kIemNativeVarKind_End:
8136 case kIemNativeVarKind_Invalid:
8137 break;
8138 }
8139 AssertFailed();
8140 } while (fHstRegs);
8141 }
8142#endif
8143 return off;
8144}
8145
8146
8147/**
8148 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8149 *
8150 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8151 *
8152 * ASSUMES that @a idxVar is valid and unpacked.
8153 */
8154DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8155{
8156 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8157 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8158 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8159 {
8160 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8161 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8162 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8163 Assert(cSlots > 0);
8164 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8165 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8166 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8167 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8168 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8169 }
8170 else
8171 Assert(idxStackSlot == UINT8_MAX);
8172}
8173
8174
8175/**
8176 * Worker that frees a single variable.
8177 *
8178 * ASSUMES that @a idxVar is valid and unpacked.
8179 */
8180DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8181{
8182 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8183 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8184 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8185
8186 /* Free the host register first if any assigned. */
8187 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8188#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8189 if ( idxHstReg != UINT8_MAX
8190 && pReNative->Core.aVars[idxVar].fSimdReg)
8191 {
8192 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8193 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8194 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8195 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8196 }
8197 else
8198#endif
8199 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8200 {
8201 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8202 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8203 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8204 }
8205
8206 /* Free argument mapping. */
8207 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8208 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8209 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8210
8211 /* Free the stack slots. */
8212 iemNativeVarFreeStackSlots(pReNative, idxVar);
8213
8214 /* Free the actual variable. */
8215 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8216 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8217}
8218
8219
8220/**
8221 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8222 */
8223DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8224{
8225 while (bmVars != 0)
8226 {
8227 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8228 bmVars &= ~RT_BIT_32(idxVar);
8229
8230#if 1 /** @todo optimize by simplifying this later... */
8231 iemNativeVarFreeOneWorker(pReNative, idxVar);
8232#else
8233 /* Only need to free the host register, the rest is done as bulk updates below. */
8234 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8235 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8236 {
8237 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8238 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8239 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8240 }
8241#endif
8242 }
8243#if 0 /** @todo optimize by simplifying this later... */
8244 pReNative->Core.bmVars = 0;
8245 pReNative->Core.bmStack = 0;
8246 pReNative->Core.u64ArgVars = UINT64_MAX;
8247#endif
8248}
8249
8250
8251
8252/*********************************************************************************************************************************
8253* Emitters for IEM_MC_CALL_CIMPL_XXX *
8254*********************************************************************************************************************************/
8255
8256/**
8257 * Emits code to load a reference to the given guest register into @a idxGprDst.
8258 */
8259DECL_HIDDEN_THROW(uint32_t)
8260iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8261 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8262{
8263#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8264 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8265#endif
8266
8267 /*
8268 * Get the offset relative to the CPUMCTX structure.
8269 */
8270 uint32_t offCpumCtx;
8271 switch (enmClass)
8272 {
8273 case kIemNativeGstRegRef_Gpr:
8274 Assert(idxRegInClass < 16);
8275 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8276 break;
8277
8278 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8279 Assert(idxRegInClass < 4);
8280 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8281 break;
8282
8283 case kIemNativeGstRegRef_EFlags:
8284 Assert(idxRegInClass == 0);
8285 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8286 break;
8287
8288 case kIemNativeGstRegRef_MxCsr:
8289 Assert(idxRegInClass == 0);
8290 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8291 break;
8292
8293 case kIemNativeGstRegRef_FpuReg:
8294 Assert(idxRegInClass < 8);
8295 AssertFailed(); /** @todo what kind of indexing? */
8296 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8297 break;
8298
8299 case kIemNativeGstRegRef_MReg:
8300 Assert(idxRegInClass < 8);
8301 AssertFailed(); /** @todo what kind of indexing? */
8302 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8303 break;
8304
8305 case kIemNativeGstRegRef_XReg:
8306 Assert(idxRegInClass < 16);
8307 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8308 break;
8309
8310 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8311 Assert(idxRegInClass == 0);
8312 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8313 break;
8314
8315 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8316 Assert(idxRegInClass == 0);
8317 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8318 break;
8319
8320 default:
8321 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8322 }
8323
8324 /*
8325 * Load the value into the destination register.
8326 */
8327#ifdef RT_ARCH_AMD64
8328 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8329
8330#elif defined(RT_ARCH_ARM64)
8331 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8332 Assert(offCpumCtx < 4096);
8333 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8334
8335#else
8336# error "Port me!"
8337#endif
8338
8339 return off;
8340}
8341
8342
8343/**
8344 * Common code for CIMPL and AIMPL calls.
8345 *
8346 * These are calls that uses argument variables and such. They should not be
8347 * confused with internal calls required to implement an MC operation,
8348 * like a TLB load and similar.
8349 *
8350 * Upon return all that is left to do is to load any hidden arguments and
8351 * perform the call. All argument variables are freed.
8352 *
8353 * @returns New code buffer offset; throws VBox status code on error.
8354 * @param pReNative The native recompile state.
8355 * @param off The code buffer offset.
8356 * @param cArgs The total nubmer of arguments (includes hidden
8357 * count).
8358 * @param cHiddenArgs The number of hidden arguments. The hidden
8359 * arguments must not have any variable declared for
8360 * them, whereas all the regular arguments must
8361 * (tstIEMCheckMc ensures this).
8362 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8363 * this will still flush pending writes in call volatile registers if false.
8364 */
8365DECL_HIDDEN_THROW(uint32_t)
8366iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8367 bool fFlushPendingWrites /*= true*/)
8368{
8369#ifdef VBOX_STRICT
8370 /*
8371 * Assert sanity.
8372 */
8373 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8374 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8375 for (unsigned i = 0; i < cHiddenArgs; i++)
8376 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8377 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8378 {
8379 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8380 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8381 }
8382 iemNativeRegAssertSanity(pReNative);
8383#endif
8384
8385 /* We don't know what the called function makes use of, so flush any pending register writes. */
8386 RT_NOREF(fFlushPendingWrites);
8387#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8388 if (fFlushPendingWrites)
8389#endif
8390 off = iemNativeRegFlushPendingWrites(pReNative, off);
8391
8392 /*
8393 * Before we do anything else, go over variables that are referenced and
8394 * make sure they are not in a register.
8395 */
8396 uint32_t bmVars = pReNative->Core.bmVars;
8397 if (bmVars)
8398 {
8399 do
8400 {
8401 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8402 bmVars &= ~RT_BIT_32(idxVar);
8403
8404 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8405 {
8406 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8407#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8408 if ( idxRegOld != UINT8_MAX
8409 && pReNative->Core.aVars[idxVar].fSimdReg)
8410 {
8411 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8412 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8413
8414 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8415 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8416 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8417 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8418 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8419 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8420 else
8421 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8422
8423 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8424 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8425
8426 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8427 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8428 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8429 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8430 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8431 }
8432 else
8433#endif
8434 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8435 {
8436 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8437 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8438 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8439 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8440 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8441
8442 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8443 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8444 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8445 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8446 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8447 }
8448 }
8449 } while (bmVars != 0);
8450#if 0 //def VBOX_STRICT
8451 iemNativeRegAssertSanity(pReNative);
8452#endif
8453 }
8454
8455 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8456
8457#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8458 /*
8459 * At the very first step go over the host registers that will be used for arguments
8460 * don't shadow anything which needs writing back first.
8461 */
8462 for (uint32_t i = 0; i < cRegArgs; i++)
8463 {
8464 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8465
8466 /* Writeback any dirty guest shadows before using this register. */
8467 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8468 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8469 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8470 }
8471#endif
8472
8473 /*
8474 * First, go over the host registers that will be used for arguments and make
8475 * sure they either hold the desired argument or are free.
8476 */
8477 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8478 {
8479 for (uint32_t i = 0; i < cRegArgs; i++)
8480 {
8481 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8482 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8483 {
8484 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8485 {
8486 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8487 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8488 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8489 Assert(pVar->idxReg == idxArgReg);
8490 uint8_t const uArgNo = pVar->uArgNo;
8491 if (uArgNo == i)
8492 { /* prefect */ }
8493 /* The variable allocator logic should make sure this is impossible,
8494 except for when the return register is used as a parameter (ARM,
8495 but not x86). */
8496#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8497 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8498 {
8499# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8500# error "Implement this"
8501# endif
8502 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8503 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8504 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8505 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8506 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8507 }
8508#endif
8509 else
8510 {
8511 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8512
8513 if (pVar->enmKind == kIemNativeVarKind_Stack)
8514 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8515 else
8516 {
8517 /* just free it, can be reloaded if used again */
8518 pVar->idxReg = UINT8_MAX;
8519 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8520 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8521 }
8522 }
8523 }
8524 else
8525 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8526 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8527 }
8528 }
8529#if 0 //def VBOX_STRICT
8530 iemNativeRegAssertSanity(pReNative);
8531#endif
8532 }
8533
8534 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8535
8536#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8537 /*
8538 * If there are any stack arguments, make sure they are in their place as well.
8539 *
8540 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8541 * the caller) be loading it later and it must be free (see first loop).
8542 */
8543 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8544 {
8545 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8546 {
8547 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8548 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8549 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8550 {
8551 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8552 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8553 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8554 pVar->idxReg = UINT8_MAX;
8555 }
8556 else
8557 {
8558 /* Use ARG0 as temp for stuff we need registers for. */
8559 switch (pVar->enmKind)
8560 {
8561 case kIemNativeVarKind_Stack:
8562 {
8563 uint8_t const idxStackSlot = pVar->idxStackSlot;
8564 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8565 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8566 iemNativeStackCalcBpDisp(idxStackSlot));
8567 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8568 continue;
8569 }
8570
8571 case kIemNativeVarKind_Immediate:
8572 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8573 continue;
8574
8575 case kIemNativeVarKind_VarRef:
8576 {
8577 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8578 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8579 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8580 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8581 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8582# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8583 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8584 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8585 if ( fSimdReg
8586 && idxRegOther != UINT8_MAX)
8587 {
8588 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8589 if (cbVar == sizeof(RTUINT128U))
8590 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8591 else
8592 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8593 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8594 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8595 }
8596 else
8597# endif
8598 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8599 {
8600 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8601 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8602 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8603 }
8604 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8605 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8606 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8607 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8608 continue;
8609 }
8610
8611 case kIemNativeVarKind_GstRegRef:
8612 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8613 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8614 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8615 continue;
8616
8617 case kIemNativeVarKind_Invalid:
8618 case kIemNativeVarKind_End:
8619 break;
8620 }
8621 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8622 }
8623 }
8624# if 0 //def VBOX_STRICT
8625 iemNativeRegAssertSanity(pReNative);
8626# endif
8627 }
8628#else
8629 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8630#endif
8631
8632 /*
8633 * Make sure the argument variables are loaded into their respective registers.
8634 *
8635 * We can optimize this by ASSUMING that any register allocations are for
8636 * registeres that have already been loaded and are ready. The previous step
8637 * saw to that.
8638 */
8639 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8640 {
8641 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8642 {
8643 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8644 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8645 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8646 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8647 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8648 else
8649 {
8650 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8651 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8652 {
8653 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8654 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8655 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8656 | RT_BIT_32(idxArgReg);
8657 pVar->idxReg = idxArgReg;
8658 }
8659 else
8660 {
8661 /* Use ARG0 as temp for stuff we need registers for. */
8662 switch (pVar->enmKind)
8663 {
8664 case kIemNativeVarKind_Stack:
8665 {
8666 uint8_t const idxStackSlot = pVar->idxStackSlot;
8667 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8668 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8669 continue;
8670 }
8671
8672 case kIemNativeVarKind_Immediate:
8673 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8674 continue;
8675
8676 case kIemNativeVarKind_VarRef:
8677 {
8678 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8679 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8680 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8681 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8682 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8683 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8684#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8685 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8686 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8687 if ( fSimdReg
8688 && idxRegOther != UINT8_MAX)
8689 {
8690 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8691 if (cbVar == sizeof(RTUINT128U))
8692 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8693 else
8694 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8695 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8696 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8697 }
8698 else
8699#endif
8700 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8701 {
8702 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8703 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8704 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8705 }
8706 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8707 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8708 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8709 continue;
8710 }
8711
8712 case kIemNativeVarKind_GstRegRef:
8713 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8714 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8715 continue;
8716
8717 case kIemNativeVarKind_Invalid:
8718 case kIemNativeVarKind_End:
8719 break;
8720 }
8721 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8722 }
8723 }
8724 }
8725#if 0 //def VBOX_STRICT
8726 iemNativeRegAssertSanity(pReNative);
8727#endif
8728 }
8729#ifdef VBOX_STRICT
8730 else
8731 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8732 {
8733 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8734 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8735 }
8736#endif
8737
8738 /*
8739 * Free all argument variables (simplified).
8740 * Their lifetime always expires with the call they are for.
8741 */
8742 /** @todo Make the python script check that arguments aren't used after
8743 * IEM_MC_CALL_XXXX. */
8744 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8745 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8746 * an argument value. There is also some FPU stuff. */
8747 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8748 {
8749 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8750 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8751
8752 /* no need to free registers: */
8753 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8754 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8755 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8756 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8757 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8758 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8759
8760 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8761 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8762 iemNativeVarFreeStackSlots(pReNative, idxVar);
8763 }
8764 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8765
8766 /*
8767 * Flush volatile registers as we make the call.
8768 */
8769 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8770
8771 return off;
8772}
8773
8774
8775
8776/*********************************************************************************************************************************
8777* TLB Lookup. *
8778*********************************************************************************************************************************/
8779
8780/**
8781 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8782 */
8783DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8784{
8785 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8786 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8787 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8788 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8789 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8790
8791 /* Do the lookup manually. */
8792 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8793 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8794 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8795 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8796 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8797 {
8798 /*
8799 * Check TLB page table level access flags.
8800 */
8801 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8802 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8803 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8804 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8805 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8806 | IEMTLBE_F_PG_UNASSIGNED
8807 | IEMTLBE_F_PT_NO_ACCESSED
8808 | fNoWriteNoDirty | fNoUser);
8809 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8810 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8811 {
8812 /*
8813 * Return the address.
8814 */
8815 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8816 if ((uintptr_t)pbAddr == uResult)
8817 return;
8818 RT_NOREF(cbMem);
8819 AssertFailed();
8820 }
8821 else
8822 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8823 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8824 }
8825 else
8826 AssertFailed();
8827 RT_BREAKPOINT();
8828}
8829
8830/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8831
8832
8833
8834/*********************************************************************************************************************************
8835* Recompiler Core. *
8836*********************************************************************************************************************************/
8837
8838/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8839static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8840{
8841 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8842 pDis->cbCachedInstr += cbMaxRead;
8843 RT_NOREF(cbMinRead);
8844 return VERR_NO_DATA;
8845}
8846
8847
8848DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8849{
8850 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8851 {
8852#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8853 ENTRY(fLocalForcedActions),
8854 ENTRY(iem.s.rcPassUp),
8855 ENTRY(iem.s.fExec),
8856 ENTRY(iem.s.pbInstrBuf),
8857 ENTRY(iem.s.uInstrBufPc),
8858 ENTRY(iem.s.GCPhysInstrBuf),
8859 ENTRY(iem.s.cbInstrBufTotal),
8860 ENTRY(iem.s.idxTbCurInstr),
8861 ENTRY(iem.s.fSkippingEFlags),
8862#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8863 ENTRY(iem.s.uPcUpdatingDebug),
8864#endif
8865#ifdef VBOX_WITH_STATISTICS
8866 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8867 ENTRY(iem.s.StatNativeTlbHitsForStore),
8868 ENTRY(iem.s.StatNativeTlbHitsForStack),
8869 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8870 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8871 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8872 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8873 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8874#endif
8875 ENTRY(iem.s.DataTlb.uTlbRevision),
8876 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8877 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8878 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8879 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8880 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8881 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8882 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8883 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8884 ENTRY(iem.s.DataTlb.aEntries),
8885 ENTRY(iem.s.CodeTlb.uTlbRevision),
8886 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8887 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8888 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8889 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8890 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8891 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8892 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8893 ENTRY(iem.s.CodeTlb.aEntries),
8894 ENTRY(pVMR3),
8895 ENTRY(cpum.GstCtx.rax),
8896 ENTRY(cpum.GstCtx.ah),
8897 ENTRY(cpum.GstCtx.rcx),
8898 ENTRY(cpum.GstCtx.ch),
8899 ENTRY(cpum.GstCtx.rdx),
8900 ENTRY(cpum.GstCtx.dh),
8901 ENTRY(cpum.GstCtx.rbx),
8902 ENTRY(cpum.GstCtx.bh),
8903 ENTRY(cpum.GstCtx.rsp),
8904 ENTRY(cpum.GstCtx.rbp),
8905 ENTRY(cpum.GstCtx.rsi),
8906 ENTRY(cpum.GstCtx.rdi),
8907 ENTRY(cpum.GstCtx.r8),
8908 ENTRY(cpum.GstCtx.r9),
8909 ENTRY(cpum.GstCtx.r10),
8910 ENTRY(cpum.GstCtx.r11),
8911 ENTRY(cpum.GstCtx.r12),
8912 ENTRY(cpum.GstCtx.r13),
8913 ENTRY(cpum.GstCtx.r14),
8914 ENTRY(cpum.GstCtx.r15),
8915 ENTRY(cpum.GstCtx.es.Sel),
8916 ENTRY(cpum.GstCtx.es.u64Base),
8917 ENTRY(cpum.GstCtx.es.u32Limit),
8918 ENTRY(cpum.GstCtx.es.Attr),
8919 ENTRY(cpum.GstCtx.cs.Sel),
8920 ENTRY(cpum.GstCtx.cs.u64Base),
8921 ENTRY(cpum.GstCtx.cs.u32Limit),
8922 ENTRY(cpum.GstCtx.cs.Attr),
8923 ENTRY(cpum.GstCtx.ss.Sel),
8924 ENTRY(cpum.GstCtx.ss.u64Base),
8925 ENTRY(cpum.GstCtx.ss.u32Limit),
8926 ENTRY(cpum.GstCtx.ss.Attr),
8927 ENTRY(cpum.GstCtx.ds.Sel),
8928 ENTRY(cpum.GstCtx.ds.u64Base),
8929 ENTRY(cpum.GstCtx.ds.u32Limit),
8930 ENTRY(cpum.GstCtx.ds.Attr),
8931 ENTRY(cpum.GstCtx.fs.Sel),
8932 ENTRY(cpum.GstCtx.fs.u64Base),
8933 ENTRY(cpum.GstCtx.fs.u32Limit),
8934 ENTRY(cpum.GstCtx.fs.Attr),
8935 ENTRY(cpum.GstCtx.gs.Sel),
8936 ENTRY(cpum.GstCtx.gs.u64Base),
8937 ENTRY(cpum.GstCtx.gs.u32Limit),
8938 ENTRY(cpum.GstCtx.gs.Attr),
8939 ENTRY(cpum.GstCtx.rip),
8940 ENTRY(cpum.GstCtx.eflags),
8941 ENTRY(cpum.GstCtx.uRipInhibitInt),
8942 ENTRY(cpum.GstCtx.cr0),
8943 ENTRY(cpum.GstCtx.cr4),
8944 ENTRY(cpum.GstCtx.aXcr[0]),
8945 ENTRY(cpum.GstCtx.aXcr[1]),
8946#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8947 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8948 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8949 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8950 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8951 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8952 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8953 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8954 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8955 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8956 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8957 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8958 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8959 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8960 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8961 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8962 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8963 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8964 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8965 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8966 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8967 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8968 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8969 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8970 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8971 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8972 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8973 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8974 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8975 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8976 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8977 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8978 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8979 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8980#endif
8981#undef ENTRY
8982 };
8983#ifdef VBOX_STRICT
8984 static bool s_fOrderChecked = false;
8985 if (!s_fOrderChecked)
8986 {
8987 s_fOrderChecked = true;
8988 uint32_t offPrev = s_aMembers[0].off;
8989 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8990 {
8991 Assert(s_aMembers[i].off > offPrev);
8992 offPrev = s_aMembers[i].off;
8993 }
8994 }
8995#endif
8996
8997 /*
8998 * Binary lookup.
8999 */
9000 unsigned iStart = 0;
9001 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9002 for (;;)
9003 {
9004 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9005 uint32_t const offCur = s_aMembers[iCur].off;
9006 if (off < offCur)
9007 {
9008 if (iCur != iStart)
9009 iEnd = iCur;
9010 else
9011 break;
9012 }
9013 else if (off > offCur)
9014 {
9015 if (iCur + 1 < iEnd)
9016 iStart = iCur + 1;
9017 else
9018 break;
9019 }
9020 else
9021 return s_aMembers[iCur].pszName;
9022 }
9023#ifdef VBOX_WITH_STATISTICS
9024 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9025 return "iem.s.acThreadedFuncStats[iFn]";
9026#endif
9027 return NULL;
9028}
9029
9030
9031/**
9032 * Translates a label to a name.
9033 */
9034static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
9035{
9036 switch (enmLabel)
9037 {
9038#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
9039 STR_CASE_CMN(Invalid);
9040 STR_CASE_CMN(RaiseDe);
9041 STR_CASE_CMN(RaiseUd);
9042 STR_CASE_CMN(RaiseSseRelated);
9043 STR_CASE_CMN(RaiseAvxRelated);
9044 STR_CASE_CMN(RaiseSseAvxFpRelated);
9045 STR_CASE_CMN(RaiseNm);
9046 STR_CASE_CMN(RaiseGp0);
9047 STR_CASE_CMN(RaiseMf);
9048 STR_CASE_CMN(RaiseXf);
9049 STR_CASE_CMN(ObsoleteTb);
9050 STR_CASE_CMN(NeedCsLimChecking);
9051 STR_CASE_CMN(CheckBranchMiss);
9052 STR_CASE_CMN(ReturnSuccess);
9053 STR_CASE_CMN(ReturnBreak);
9054 STR_CASE_CMN(ReturnBreakFF);
9055 STR_CASE_CMN(ReturnWithFlags);
9056 STR_CASE_CMN(ReturnBreakViaLookup);
9057 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
9058 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
9059 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
9060 STR_CASE_CMN(NonZeroRetOrPassUp);
9061#undef STR_CASE_CMN
9062#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
9063 STR_CASE_LBL(LoopJumpTarget);
9064 STR_CASE_LBL(If);
9065 STR_CASE_LBL(Else);
9066 STR_CASE_LBL(Endif);
9067 STR_CASE_LBL(CheckIrq);
9068 STR_CASE_LBL(TlbLookup);
9069 STR_CASE_LBL(TlbMiss);
9070 STR_CASE_LBL(TlbDone);
9071 case kIemNativeLabelType_End: break;
9072 }
9073 return NULL;
9074}
9075
9076
9077/** Info for the symbols resolver used when disassembling. */
9078typedef struct IEMNATIVDISASMSYMCTX
9079{
9080 PVMCPU pVCpu;
9081 PCIEMTB pTb;
9082 PCIEMNATIVEPERCHUNKCTX pCtx;
9083#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9084 PCIEMTBDBG pDbgInfo;
9085#endif
9086} IEMNATIVDISASMSYMCTX;
9087typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9088
9089
9090/**
9091 * Resolve address to symbol, if we can.
9092 */
9093static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9094{
9095 PCIEMTB const pTb = pSymCtx->pTb;
9096 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9097 if (offNative <= pTb->Native.cInstructions)
9098 {
9099#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9100 /*
9101 * Scan debug info for a matching label.
9102 * Since the debug info should be 100% linear, we can do a binary search here.
9103 */
9104 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9105 if (pDbgInfo)
9106 {
9107 uint32_t const cEntries = pDbgInfo->cEntries;
9108 uint32_t idxEnd = cEntries;
9109 uint32_t idxStart = 0;
9110 for (;;)
9111 {
9112 /* Find a NativeOffset record close to the midpoint. */
9113 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9114 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9115 idx--;
9116 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9117 {
9118 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9119 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9120 idx++;
9121 if (idx >= idxEnd)
9122 break;
9123 }
9124
9125 /* Do the binary searching thing. */
9126 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9127 {
9128 if (idx > idxStart)
9129 idxEnd = idx;
9130 else
9131 break;
9132 }
9133 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9134 {
9135 idx += 1;
9136 if (idx < idxEnd)
9137 idxStart = idx;
9138 else
9139 break;
9140 }
9141 else
9142 {
9143 /* Got a matching offset, scan forward till we hit a label, but
9144 stop when the native offset changes. */
9145 while (++idx < cEntries)
9146 switch (pDbgInfo->aEntries[idx].Gen.uType)
9147 {
9148 case kIemTbDbgEntryType_Label:
9149 {
9150 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9151 const char * const pszName = iemNativeGetLabelName(enmLabel);
9152 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9153 return pszName;
9154 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9155 return pszBuf;
9156 }
9157
9158 case kIemTbDbgEntryType_NativeOffset:
9159 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9160 return NULL;
9161 break;
9162 }
9163 break;
9164 }
9165 }
9166 }
9167#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9168 }
9169 else
9170 {
9171 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9172 if (pChunkCtx)
9173 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9174 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9175 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9176 }
9177 RT_NOREF(pszBuf, cbBuf);
9178 return NULL;
9179}
9180
9181#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9182
9183/**
9184 * @callback_method_impl{FNDISGETSYMBOL}
9185 */
9186static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9187 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9188{
9189 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9190 if (pszSym)
9191 {
9192 *poff = 0;
9193 if (pszSym != pszBuf)
9194 return RTStrCopy(pszBuf, cchBuf, pszSym);
9195 return VINF_SUCCESS;
9196 }
9197 RT_NOREF(pDis, u32Sel);
9198 return VERR_SYMBOL_NOT_FOUND;
9199}
9200
9201#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9202
9203/**
9204 * Annotates an instruction decoded by the capstone disassembler.
9205 */
9206static const char *
9207iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9208{
9209# if defined(RT_ARCH_ARM64)
9210 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9211 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9212 {
9213 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9214 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9215 char const *psz = strchr(pInstr->op_str, '[');
9216 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9217 {
9218 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9219 int32_t off = -1;
9220 psz += 4;
9221 if (*psz == ']')
9222 off = 0;
9223 else if (*psz == ',')
9224 {
9225 psz = RTStrStripL(psz + 1);
9226 if (*psz == '#')
9227 off = RTStrToInt32(&psz[1]);
9228 /** @todo deal with index registers and LSL as well... */
9229 }
9230 if (off >= 0)
9231 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9232 }
9233 }
9234 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9235 {
9236 const char *pszAddr = strchr(pInstr->op_str, '#');
9237 if (pszAddr)
9238 {
9239 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9240 if (uAddr != 0)
9241 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9242 }
9243 }
9244# endif
9245 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9246 return NULL;
9247}
9248#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9249
9250
9251DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9252{
9253 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9254#if defined(RT_ARCH_AMD64)
9255 static const char * const a_apszMarkers[] =
9256 {
9257 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9258 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9259 };
9260#endif
9261
9262 char szDisBuf[512];
9263 DISSTATE Dis;
9264 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9265 uint32_t const cNative = pTb->Native.cInstructions;
9266 uint32_t offNative = 0;
9267#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9268 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9269#endif
9270 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9271 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9272 : DISCPUMODE_64BIT;
9273#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9274 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9275#else
9276 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9277#endif
9278#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9279 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9280#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9281 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9282#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9283# error "Port me"
9284#else
9285 csh hDisasm = ~(size_t)0;
9286# if defined(RT_ARCH_AMD64)
9287 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9288# elif defined(RT_ARCH_ARM64)
9289 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9290# else
9291# error "Port me"
9292# endif
9293 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9294
9295 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9296 //Assert(rcCs == CS_ERR_OK);
9297#endif
9298
9299 /*
9300 * Print TB info.
9301 */
9302 pHlp->pfnPrintf(pHlp,
9303 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9304 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9305 pTb, pTb->GCPhysPc,
9306#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9307 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9308#else
9309 pTb->FlatPc,
9310#endif
9311 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9312 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9313#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9314 if (pDbgInfo && pDbgInfo->cEntries > 1)
9315 {
9316 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9317
9318 /*
9319 * This disassembly is driven by the debug info which follows the native
9320 * code and indicates when it starts with the next guest instructions,
9321 * where labels are and such things.
9322 */
9323 uint32_t idxThreadedCall = 0;
9324 uint32_t idxGuestInstr = 0;
9325 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9326 uint8_t idxRange = UINT8_MAX;
9327 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9328 uint32_t offRange = 0;
9329 uint32_t offOpcodes = 0;
9330 uint32_t const cbOpcodes = pTb->cbOpcodes;
9331 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9332 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9333 uint32_t iDbgEntry = 1;
9334 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9335
9336 while (offNative < cNative)
9337 {
9338 /* If we're at or have passed the point where the next chunk of debug
9339 info starts, process it. */
9340 if (offDbgNativeNext <= offNative)
9341 {
9342 offDbgNativeNext = UINT32_MAX;
9343 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9344 {
9345 switch ((IEMTBDBGENTRYTYPE)pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9346 {
9347 case kIemTbDbgEntryType_GuestInstruction:
9348 {
9349 /* Did the exec flag change? */
9350 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9351 {
9352 pHlp->pfnPrintf(pHlp,
9353 " fExec change %#08x -> %#08x %s\n",
9354 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9355 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9356 szDisBuf, sizeof(szDisBuf)));
9357 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9358 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9359 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9360 : DISCPUMODE_64BIT;
9361 }
9362
9363 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9364 where the compilation was aborted before the opcode was recorded and the actual
9365 instruction was translated to a threaded call. This may happen when we run out
9366 of ranges, or when some complicated interrupts/FFs are found to be pending or
9367 similar. So, we just deal with it here rather than in the compiler code as it
9368 is a lot simpler to do here. */
9369 if ( idxRange == UINT8_MAX
9370 || idxRange >= cRanges
9371 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9372 {
9373 idxRange += 1;
9374 if (idxRange < cRanges)
9375 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9376 else
9377 continue;
9378 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9379 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9380 + (pTb->aRanges[idxRange].idxPhysPage == 0
9381 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9382 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9383 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9384 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9385 pTb->aRanges[idxRange].idxPhysPage);
9386 GCPhysPc += offRange;
9387 }
9388
9389 /* Disassemble the instruction. */
9390 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9391 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9392 uint32_t cbInstr = 1;
9393 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9394 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9395 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9396 if (RT_SUCCESS(rc))
9397 {
9398 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9399 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9400 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9401 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9402
9403 static unsigned const s_offMarker = 55;
9404 static char const s_szMarker[] = " ; <--- guest";
9405 if (cch < s_offMarker)
9406 {
9407 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9408 cch = s_offMarker;
9409 }
9410 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9411 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9412
9413 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9414 }
9415 else
9416 {
9417 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9418 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9419 cbInstr = 1;
9420 }
9421 idxGuestInstr++;
9422 GCPhysPc += cbInstr;
9423 offOpcodes += cbInstr;
9424 offRange += cbInstr;
9425 continue;
9426 }
9427
9428 case kIemTbDbgEntryType_ThreadedCall:
9429 pHlp->pfnPrintf(pHlp,
9430 " Call #%u to %s (%u args) - %s\n",
9431 idxThreadedCall,
9432 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9433 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9434 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9435 idxThreadedCall++;
9436 continue;
9437
9438 case kIemTbDbgEntryType_GuestRegShadowing:
9439 {
9440 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9441 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9442 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9443 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9444 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9445 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9446 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9447 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9448 else
9449 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9450 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9451 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9452 continue;
9453 }
9454
9455# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9456 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9457 {
9458 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9459 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9460 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9461 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9462 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9463 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9464 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9465 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9466 else
9467 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9468 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9469 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9470 continue;
9471 }
9472# endif
9473
9474 case kIemTbDbgEntryType_Label:
9475 {
9476 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9477 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9478 {
9479 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9480 ? " ; regs state restored pre-if-block" : "";
9481 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9482 }
9483 else
9484 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9485 continue;
9486 }
9487
9488 case kIemTbDbgEntryType_NativeOffset:
9489 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9490 Assert(offDbgNativeNext >= offNative);
9491 break;
9492
9493# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9494 case kIemTbDbgEntryType_DelayedPcUpdate:
9495 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9496 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9497 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9498 continue;
9499# endif
9500
9501# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9502 case kIemTbDbgEntryType_GuestRegDirty:
9503 {
9504 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9505 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9506 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9507 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9508 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9509 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9510 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9511 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9512 pszGstReg, pszHstReg);
9513 continue;
9514 }
9515
9516 case kIemTbDbgEntryType_GuestRegWriteback:
9517 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9518 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9519 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9520 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9521 continue;
9522# endif
9523
9524# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9525 case kIemTbDbgEntryType_PostponedEFlagsCalc:
9526 {
9527 const char *pszOp = "!unknown!";
9528 switch ((IEMNATIVE_POSTPONED_EFL_OP_T)pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.enmOp)
9529 {
9530 case kIemNativePostponedEflOp_Logical: pszOp = "logical"; break;
9531 case kIemNativePostponedEflOp_Invalid: break;
9532 case kIemNativePostponedEflOp_End: break;
9533 }
9534 pHlp->pfnPrintf(pHlp, " Postponed EFLAGS calc #%u: %s %u bits\n",
9535 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.idxEmit, pszOp,
9536 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.cOpBits);
9537 continue;
9538 }
9539# endif
9540 default:
9541 AssertFailed();
9542 continue;
9543 }
9544 /* Break out of the loop at kIemTbDbgEntryType_NativeOffset. */
9545 iDbgEntry++;
9546 break;
9547 }
9548 }
9549
9550 /*
9551 * Disassemble the next native instruction.
9552 */
9553 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9554# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9555 uint32_t cbInstr = sizeof(paNative[0]);
9556 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9557 if (RT_SUCCESS(rc))
9558 {
9559# if defined(RT_ARCH_AMD64)
9560 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9561 {
9562 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9563 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9564 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9565 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9566 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9567 uInfo & 0x8000 ? "recompiled" : "todo");
9568 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9569 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9570 else
9571 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9572 }
9573 else
9574# endif
9575 {
9576 const char *pszAnnotation = NULL;
9577# ifdef RT_ARCH_AMD64
9578 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9579 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9580 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9581 iemNativeDisasmGetSymbolCb, &SymCtx);
9582 PCDISOPPARAM pMemOp;
9583 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9584 pMemOp = &Dis.aParams[0];
9585 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9586 pMemOp = &Dis.aParams[1];
9587 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9588 pMemOp = &Dis.aParams[2];
9589 else
9590 pMemOp = NULL;
9591 if ( pMemOp
9592 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9593 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9594 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9595 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9596
9597# elif defined(RT_ARCH_ARM64)
9598 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9599 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9600 iemNativeDisasmGetSymbolCb, &SymCtx);
9601# else
9602# error "Port me"
9603# endif
9604 if (pszAnnotation)
9605 {
9606 static unsigned const s_offAnnotation = 55;
9607 size_t const cchAnnotation = strlen(pszAnnotation);
9608 size_t cchDis = strlen(szDisBuf);
9609 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9610 {
9611 if (cchDis < s_offAnnotation)
9612 {
9613 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9614 cchDis = s_offAnnotation;
9615 }
9616 szDisBuf[cchDis++] = ' ';
9617 szDisBuf[cchDis++] = ';';
9618 szDisBuf[cchDis++] = ' ';
9619 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9620 }
9621 }
9622 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9623 }
9624 }
9625 else
9626 {
9627# if defined(RT_ARCH_AMD64)
9628 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9629 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9630# elif defined(RT_ARCH_ARM64)
9631 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9632# else
9633# error "Port me"
9634# endif
9635 cbInstr = sizeof(paNative[0]);
9636 }
9637 offNative += cbInstr / sizeof(paNative[0]);
9638
9639# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9640 cs_insn *pInstr;
9641 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9642 (uintptr_t)pNativeCur, 1, &pInstr);
9643 if (cInstrs > 0)
9644 {
9645 Assert(cInstrs == 1);
9646 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9647 size_t const cchOp = strlen(pInstr->op_str);
9648# if defined(RT_ARCH_AMD64)
9649 if (pszAnnotation)
9650 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9651 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9652 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9653 else
9654 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9655 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9656
9657# else
9658 if (pszAnnotation)
9659 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9660 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9661 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9662 else
9663 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9664 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9665# endif
9666 offNative += pInstr->size / sizeof(*pNativeCur);
9667 cs_free(pInstr, cInstrs);
9668 }
9669 else
9670 {
9671# if defined(RT_ARCH_AMD64)
9672 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9673 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9674# else
9675 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9676# endif
9677 offNative++;
9678 }
9679# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9680 }
9681 }
9682 else
9683#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9684 {
9685 /*
9686 * No debug info, just disassemble the x86 code and then the native code.
9687 *
9688 * First the guest code:
9689 */
9690 for (unsigned i = 0; i < pTb->cRanges; i++)
9691 {
9692 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9693 + (pTb->aRanges[i].idxPhysPage == 0
9694 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9695 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9696 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9697 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9698 unsigned off = pTb->aRanges[i].offOpcodes;
9699 /** @todo this ain't working when crossing pages! */
9700 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9701 while (off < cbOpcodes)
9702 {
9703 uint32_t cbInstr = 1;
9704 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9705 &pTb->pabOpcodes[off], cbOpcodes - off,
9706 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9707 if (RT_SUCCESS(rc))
9708 {
9709 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9710 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9711 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9712 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9713 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9714 GCPhysPc += cbInstr;
9715 off += cbInstr;
9716 }
9717 else
9718 {
9719 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9720 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9721 break;
9722 }
9723 }
9724 }
9725
9726 /*
9727 * Then the native code:
9728 */
9729 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9730 while (offNative < cNative)
9731 {
9732 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9733#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9734 uint32_t cbInstr = sizeof(paNative[0]);
9735 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9736 if (RT_SUCCESS(rc))
9737 {
9738# if defined(RT_ARCH_AMD64)
9739 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9740 {
9741 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9742 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9743 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9744 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9745 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9746 uInfo & 0x8000 ? "recompiled" : "todo");
9747 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9748 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9749 else
9750 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9751 }
9752 else
9753# endif
9754 {
9755# ifdef RT_ARCH_AMD64
9756 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9757 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9758 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9759 iemNativeDisasmGetSymbolCb, &SymCtx);
9760# elif defined(RT_ARCH_ARM64)
9761 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9762 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9763 iemNativeDisasmGetSymbolCb, &SymCtx);
9764# else
9765# error "Port me"
9766# endif
9767 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9768 }
9769 }
9770 else
9771 {
9772# if defined(RT_ARCH_AMD64)
9773 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9774 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9775# else
9776 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9777# endif
9778 cbInstr = sizeof(paNative[0]);
9779 }
9780 offNative += cbInstr / sizeof(paNative[0]);
9781
9782#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9783 cs_insn *pInstr;
9784 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9785 (uintptr_t)pNativeCur, 1, &pInstr);
9786 if (cInstrs > 0)
9787 {
9788 Assert(cInstrs == 1);
9789 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9790 size_t const cchOp = strlen(pInstr->op_str);
9791# if defined(RT_ARCH_AMD64)
9792 if (pszAnnotation)
9793 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9794 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9795 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9796 else
9797 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9798 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9799
9800# else
9801 if (pszAnnotation)
9802 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9803 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9804 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9805 else
9806 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9807 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9808# endif
9809 offNative += pInstr->size / sizeof(*pNativeCur);
9810 cs_free(pInstr, cInstrs);
9811 }
9812 else
9813 {
9814# if defined(RT_ARCH_AMD64)
9815 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9816 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9817# else
9818 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9819# endif
9820 offNative++;
9821 }
9822#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9823 }
9824 }
9825
9826#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9827 /* Cleanup. */
9828 cs_close(&hDisasm);
9829#endif
9830}
9831
9832
9833/** Emit alignment padding between labels / functions. */
9834DECL_INLINE_THROW(uint32_t)
9835iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9836{
9837 if (off & fAlignMask)
9838 {
9839 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9840 while (off & fAlignMask)
9841#if defined(RT_ARCH_AMD64)
9842 pCodeBuf[off++] = 0xcc;
9843#elif defined(RT_ARCH_ARM64)
9844 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9845#else
9846# error "port me"
9847#endif
9848 }
9849 return off;
9850}
9851
9852
9853/**
9854 * Called when a new chunk is allocate to emit common per-chunk code.
9855 *
9856 * Allocates a per-chunk context directly from the chunk itself and place the
9857 * common code there.
9858 *
9859 * @returns VBox status code.
9860 * @param pVCpu The cross context virtual CPU structure of the calling
9861 * thread.
9862 * @param idxChunk The index of the chunk being added and requiring a
9863 * common code context.
9864 * @param ppCtx Where to return the pointer to the chunk context start.
9865 */
9866DECLHIDDEN(int) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk, PCIEMNATIVEPERCHUNKCTX *ppCtx)
9867{
9868 *ppCtx = NULL;
9869
9870 /*
9871 * Allocate a new recompiler state (since we're likely to be called while
9872 * the default one is fully loaded already with a recompiled TB).
9873 *
9874 * This is a bit of overkill, but this isn't a frequently used code path.
9875 */
9876 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9877 AssertReturn(pReNative, VERR_NO_MEMORY);
9878
9879#if defined(RT_ARCH_AMD64)
9880 uint32_t const fAlignMask = 15;
9881#elif defined(RT_ARCH_ARM64)
9882 uint32_t const fAlignMask = 31 / 4;
9883#else
9884# error "port me"
9885#endif
9886 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9887 int rc = VINF_SUCCESS;
9888 uint32_t off = 0;
9889
9890 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9891 {
9892 /*
9893 * Emit the epilog code.
9894 */
9895 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9896 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9897 uint32_t const offReturnWithStatus = off;
9898 off = iemNativeEmitCoreEpilog(pReNative, off);
9899
9900 /*
9901 * Generate special jump labels. All of these gets a copy of the epilog code.
9902 */
9903 static struct
9904 {
9905 IEMNATIVELABELTYPE enmExitReason;
9906 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9907 } const s_aSpecialWithEpilogs[] =
9908 {
9909 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9910 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9911 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9912 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9913 };
9914 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9915 {
9916 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9917 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9918 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9919 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9920 off = iemNativeEmitCoreEpilog(pReNative, off);
9921 }
9922
9923 /*
9924 * Do what iemNativeEmitReturnBreakViaLookup does.
9925 */
9926 static struct
9927 {
9928 IEMNATIVELABELTYPE enmExitReason;
9929 uintptr_t pfnHelper;
9930 } const s_aViaLookup[] =
9931 {
9932 { kIemNativeLabelType_ReturnBreakViaLookup,
9933 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9934 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9935 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9936 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9937 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9938 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9939 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9940 };
9941 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9942 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9943 {
9944 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9945 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9946 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9947 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9948 }
9949
9950 /*
9951 * Generate simple TB tail labels that just calls a help with a pVCpu
9952 * arg and either return or longjmps/throws a non-zero status.
9953 */
9954 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9955 static struct
9956 {
9957 IEMNATIVELABELTYPE enmExitReason;
9958 bool fWithEpilog;
9959 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9960 } const s_aSimpleTailLabels[] =
9961 {
9962 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9963 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9964 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9965 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9966 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9967 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9968 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9969 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9970 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9971 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9972 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9973 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9974 };
9975 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9976 {
9977 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9978 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9979 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9980
9981 /* int pfnCallback(PVMCPUCC pVCpu) */
9982 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9983 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9984
9985 /* If the callback is supposed to return with a status code we inline the epilog
9986 sequence for better speed. Otherwise, if the callback shouldn't return because
9987 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
9988 if (s_aSimpleTailLabels[i].fWithEpilog)
9989 off = iemNativeEmitCoreEpilog(pReNative, off);
9990 else
9991 {
9992#ifdef VBOX_STRICT
9993 off = iemNativeEmitBrk(pReNative, off, 0x2201);
9994#endif
9995 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
9996 }
9997 }
9998
9999
10000#ifdef VBOX_STRICT
10001 /* Make sure we've generate code for all labels. */
10002 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
10003 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10004#endif
10005 }
10006 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10007 {
10008 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
10009 iemNativeTerm(pReNative);
10010 return rc;
10011 }
10012 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10013
10014 /*
10015 * Allocate memory for the context (first) and the common code (last).
10016 */
10017 PIEMNATIVEPERCHUNKCTX pCtx;
10018 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
10019 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
10020 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
10021 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
10022 AssertLogRelMsgReturnStmt(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk),
10023 iemNativeTerm(pReNative), VERR_OUT_OF_RESOURCES);
10024
10025 /*
10026 * Copy over the generated code.
10027 * There should be no fixups or labels defined here.
10028 */
10029 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
10030 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
10031
10032 Assert(pReNative->cFixups == 0);
10033 Assert(pReNative->cLabels == 0);
10034
10035 /*
10036 * Initialize the context.
10037 */
10038 AssertCompile(kIemNativeLabelType_Invalid == 0);
10039 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
10040 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
10041 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
10042 {
10043 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10044 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
10045 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
10046 }
10047
10048 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
10049
10050 iemNativeTerm(pReNative);
10051 *ppCtx = pCtx;
10052 return VINF_SUCCESS;
10053}
10054
10055
10056/**
10057 * Recompiles the given threaded TB into a native one.
10058 *
10059 * In case of failure the translation block will be returned as-is.
10060 *
10061 * @returns pTb.
10062 * @param pVCpu The cross context virtual CPU structure of the calling
10063 * thread.
10064 * @param pTb The threaded translation to recompile to native.
10065 */
10066DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10067{
10068#if 0 /* For profiling the native recompiler code. */
10069l_profile_again:
10070#endif
10071 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10072
10073 /*
10074 * The first time thru, we allocate the recompiler state and save it,
10075 * all the other times we'll just reuse the saved one after a quick reset.
10076 */
10077 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10078 if (RT_LIKELY(pReNative))
10079 iemNativeReInit(pReNative, pTb);
10080 else
10081 {
10082 pReNative = iemNativeInit(pVCpu, pTb);
10083 AssertReturn(pReNative, pTb);
10084 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10085 }
10086
10087#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10088 /*
10089 * First do liveness analysis. This is done backwards.
10090 */
10091 {
10092 uint32_t idxCall = pTb->Thrd.cCalls;
10093 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10094 { /* likely */ }
10095 else
10096 {
10097 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10098 while (idxCall > cAlloc)
10099 cAlloc *= 2;
10100 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10101 AssertReturn(pvNew, pTb);
10102 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10103 pReNative->cLivenessEntriesAlloc = cAlloc;
10104 }
10105 AssertReturn(idxCall > 0, pTb);
10106 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10107
10108 /* The initial (final) entry. */
10109 idxCall--;
10110 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10111
10112 /* Loop backwards thru the calls and fill in the other entries. */
10113 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10114 while (idxCall > 0)
10115 {
10116 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10117 Assert(pfnLiveness);
10118 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10119 pCallEntry--;
10120 idxCall--;
10121 }
10122 }
10123#endif
10124
10125 /*
10126 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10127 * for aborting if an error happens.
10128 */
10129 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10130#ifdef LOG_ENABLED
10131 uint32_t const cCallsOrg = cCallsLeft;
10132#endif
10133 uint32_t off = 0;
10134 int rc = VINF_SUCCESS;
10135 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10136 {
10137 /*
10138 * Convert the calls to native code.
10139 */
10140#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10141 int32_t iGstInstr = -1;
10142#endif
10143#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10144 uint32_t cThreadedCalls = 0;
10145 uint32_t cRecompiledCalls = 0;
10146#endif
10147#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10148 uint32_t idxCurCall = 0;
10149#endif
10150 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10151 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10152 while (cCallsLeft-- > 0)
10153 {
10154 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10155#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10156 pReNative->idxCurCall = idxCurCall;
10157#endif
10158
10159#ifdef IEM_WITH_INTRA_TB_JUMPS
10160 /*
10161 * Define label for jump targets (currently only the first entry).
10162 */
10163 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10164 { /* likely */ }
10165 else
10166 {
10167 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10168 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10169 }
10170#endif
10171
10172 /*
10173 * Debug info, assembly markup and statistics.
10174 */
10175#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10176 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10177 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10178#endif
10179#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10180 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10181 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10182 {
10183 if (iGstInstr < (int32_t)pTb->cInstructions)
10184 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10185 else
10186 Assert(iGstInstr == pTb->cInstructions);
10187 iGstInstr = pCallEntry->idxInstr;
10188 }
10189 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10190#endif
10191#if defined(VBOX_STRICT)
10192 off = iemNativeEmitMarker(pReNative, off,
10193 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10194#endif
10195#if defined(VBOX_STRICT)
10196 iemNativeRegAssertSanity(pReNative);
10197#endif
10198#ifdef VBOX_WITH_STATISTICS
10199 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10200#endif
10201
10202#if 0
10203 if ( pTb->GCPhysPc == 0x00000000000c1240
10204 && idxCurCall == 67)
10205 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10206#endif
10207
10208 /*
10209 * Actual work.
10210 */
10211 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10212 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10213 if (pfnRecom) /** @todo stats on this. */
10214 {
10215 off = pfnRecom(pReNative, off, pCallEntry);
10216 STAM_REL_STATS({cRecompiledCalls++;});
10217 }
10218 else
10219 {
10220 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10221 STAM_REL_STATS({cThreadedCalls++;});
10222 }
10223 Assert(off <= pReNative->cInstrBufAlloc);
10224 Assert(pReNative->cCondDepth == 0);
10225
10226#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10227 if (LogIs2Enabled())
10228 {
10229 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10230# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10231 static const char s_achState[] = "CUXI";
10232# else
10233 /* 0123 4567 89ab cdef */
10234 /* CCCC CCCC */
10235 /* WWWW WWWW */
10236 /* RR RR RR RR */
10237 /* P P P P P P P P */
10238 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10239# endif
10240
10241 char szGpr[17];
10242 for (unsigned i = 0; i < 16; i++)
10243 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10244 szGpr[16] = '\0';
10245
10246 char szSegBase[X86_SREG_COUNT + 1];
10247 char szSegLimit[X86_SREG_COUNT + 1];
10248 char szSegAttrib[X86_SREG_COUNT + 1];
10249 char szSegSel[X86_SREG_COUNT + 1];
10250 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10251 {
10252 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10253 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10254 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10255 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10256 }
10257 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10258 = szSegSel[X86_SREG_COUNT] = '\0';
10259
10260 char szEFlags[IEMLIVENESSBIT_IDX_EFL_COUNT + 1];
10261 for (unsigned i = 0; i < IEMLIVENESSBIT_IDX_EFL_COUNT; i++)
10262 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10263 szEFlags[7] = '\0';
10264
10265 Log2(("liveness: gpr=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10266 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10267 }
10268#endif
10269
10270 /*
10271 * Advance.
10272 */
10273 pCallEntry++;
10274#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10275 idxCurCall++;
10276#endif
10277 }
10278
10279 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10280 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10281 if (!cThreadedCalls)
10282 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10283
10284 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, UINT32_MAX);
10285
10286#ifdef VBOX_WITH_STATISTICS
10287 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10288#endif
10289
10290 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10291 off = iemNativeRegFlushPendingWrites(pReNative, off);
10292
10293 /*
10294 * Jump to the common per-chunk epilog code.
10295 */
10296 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10297 off = iemNativeEmitTbExit<kIemNativeLabelType_ReturnSuccess, true, false>(pReNative, off);
10298
10299 /*
10300 * Generate tail labels with jumps to the common per-chunk code on non-x86 hosts.
10301 */
10302#ifndef RT_ARCH_AMD64
10303 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10304 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10305 AssertCompile(kIemNativeLabelType_Invalid == 0);
10306 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10307 if (fTailLabels)
10308 {
10309 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, kIemNativeLabelType_LastTbExit + 1);
10310 do
10311 {
10312 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10313 fTailLabels &= ~RT_BIT_64(enmLabel);
10314
10315 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10316 AssertContinue(idxLabel != UINT32_MAX);
10317 iemNativeLabelDefine(pReNative, idxLabel, off);
10318
10319 iemNativeAddTbExitFixup(pReNative, off, enmLabel);
10320# ifdef RT_ARCH_ARM64
10321 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
10322# else
10323# error "port me"
10324# endif
10325 } while (fTailLabels);
10326 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10327 }
10328#else
10329 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10330#endif
10331 }
10332 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10333 {
10334 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10335 return pTb;
10336 }
10337 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10338 Assert(off <= pReNative->cInstrBufAlloc);
10339
10340 /*
10341 * Make sure all labels has been defined.
10342 */
10343 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10344#ifdef VBOX_STRICT
10345 uint32_t const cLabels = pReNative->cLabels;
10346 for (uint32_t i = 0; i < cLabels; i++)
10347 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10348#endif
10349
10350#if 0 /* For profiling the native recompiler code. */
10351 if (pTb->Thrd.cCalls >= 136)
10352 {
10353 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10354 goto l_profile_again;
10355 }
10356#endif
10357
10358 /*
10359 * Allocate executable memory, copy over the code we've generated.
10360 */
10361 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10362 if (pTbAllocator->pDelayedFreeHead)
10363 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10364
10365 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10366 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10367 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10368 &paFinalInstrBufRx, &pCtx);
10369
10370 AssertReturn(paFinalInstrBuf, pTb);
10371 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10372
10373 /*
10374 * Apply fixups.
10375 */
10376 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10377 uint32_t const cFixups = pReNative->cFixups;
10378 for (uint32_t i = 0; i < cFixups; i++)
10379 {
10380 Assert(paFixups[i].off < off);
10381 Assert(paFixups[i].idxLabel < cLabels);
10382 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10383 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10384 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10385 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10386 switch (paFixups[i].enmType)
10387 {
10388#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10389 case kIemNativeFixupType_Rel32:
10390 Assert(paFixups[i].off + 4 <= off);
10391 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10392 continue;
10393
10394#elif defined(RT_ARCH_ARM64)
10395 case kIemNativeFixupType_RelImm26At0:
10396 {
10397 Assert(paFixups[i].off < off);
10398 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10399 Assert(offDisp >= -33554432 && offDisp < 33554432);
10400 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10401 continue;
10402 }
10403
10404 case kIemNativeFixupType_RelImm19At5:
10405 {
10406 Assert(paFixups[i].off < off);
10407 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10408 Assert(offDisp >= -262144 && offDisp < 262144);
10409 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10410 continue;
10411 }
10412
10413 case kIemNativeFixupType_RelImm14At5:
10414 {
10415 Assert(paFixups[i].off < off);
10416 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10417 Assert(offDisp >= -8192 && offDisp < 8192);
10418 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10419 continue;
10420 }
10421
10422#endif
10423 case kIemNativeFixupType_Invalid:
10424 case kIemNativeFixupType_End:
10425 break;
10426 }
10427 AssertFailed();
10428 }
10429
10430 /*
10431 * Apply TB exit fixups.
10432 */
10433 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10434 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10435 for (uint32_t i = 0; i < cTbExitFixups; i++)
10436 {
10437 Assert(paTbExitFixups[i].off < off);
10438 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10439 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10440
10441#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10442 Assert(paTbExitFixups[i].off + 4 <= off);
10443 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10444 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10445 *Ptr.pi32 = (int32_t)offDisp;
10446
10447#elif defined(RT_ARCH_ARM64)
10448 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10449 Assert(offDisp >= -33554432 && offDisp < 33554432);
10450 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10451
10452#else
10453# error "Port me!"
10454#endif
10455 }
10456
10457 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10458 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10459
10460 /*
10461 * Convert the translation block.
10462 */
10463 RTMemFree(pTb->Thrd.paCalls);
10464 pTb->Native.paInstructions = paFinalInstrBufRx;
10465 pTb->Native.cInstructions = off;
10466 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10467#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10468 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10469 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10470 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10471#endif
10472
10473 Assert(pTbAllocator->cThreadedTbs > 0);
10474 pTbAllocator->cThreadedTbs -= 1;
10475 pTbAllocator->cNativeTbs += 1;
10476 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10477
10478#ifdef LOG_ENABLED
10479 /*
10480 * Disassemble to the log if enabled.
10481 */
10482 if (LogIs3Enabled())
10483 {
10484 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10485 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10486# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10487 RTLogFlush(NULL);
10488# endif
10489 }
10490#endif
10491 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10492
10493 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10494 return pTb;
10495}
10496
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette