VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 106297

Last change on this file since 106297 was 106197, checked in by vboxsync, 6 months ago

VMM/IEM: Use iemNativeEmitEFlagsForLogical as emitter for all cases. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 448.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 106197 2024-10-01 15:35:13Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : Postponed and skipped EFLAGS calculations.
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80#include "target-x86/IEMAllN8veEmit-x86.h"
81
82
83/*
84 * Narrow down configs here to avoid wasting time on unused configs here.
85 * Note! Same checks in IEMAllThrdRecompiler.cpp.
86 */
87
88#ifndef IEM_WITH_CODE_TLB
89# error The code TLB must be enabled for the recompiler.
90#endif
91
92#ifndef IEM_WITH_DATA_TLB
93# error The data TLB must be enabled for the recompiler.
94#endif
95
96#ifndef IEM_WITH_SETJMP
97# error The setjmp approach must be enabled for the recompiler.
98#endif
99
100/** @todo eliminate this clang build hack. */
101#if RT_CLANG_PREREQ(4, 0)
102# pragma GCC diagnostic ignored "-Wunused-function"
103#endif
104
105
106/*********************************************************************************************************************************
107* Internal Functions *
108*********************************************************************************************************************************/
109#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
110static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
111#endif
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
113DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
114 IEMNATIVEGSTREG enmGstReg, uint32_t off);
115DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
116static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
117
118
119
120/*********************************************************************************************************************************
121* Native Recompilation *
122*********************************************************************************************************************************/
123
124
125/**
126 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
127 */
128IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
129{
130 pVCpu->iem.s.cInstructions += idxInstr;
131 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
132}
133
134
135/**
136 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
137 */
138DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
139{
140 uint64_t fCpu = pVCpu->fLocalForcedActions;
141 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
142 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
143 | VMCPU_FF_TLB_FLUSH
144 | VMCPU_FF_UNHALT );
145 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
146 if (RT_LIKELY( ( !fCpu
147 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
148 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
149 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
150 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
151 return false;
152 return true;
153}
154
155
156/**
157 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
158 */
159template <bool const a_fWithIrqCheck>
160IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
161 uint32_t fFlags, RTGCPHYS GCPhysPc))
162{
163 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
164 Assert(idxTbLookup < pTb->cTbLookupEntries);
165 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
166#if 1
167 PIEMTB const pNewTb = *ppNewTb;
168 if (pNewTb)
169 {
170# ifdef VBOX_STRICT
171 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
172 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
173 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
174 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
175 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
176 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
177# endif
178 if (pNewTb->GCPhysPc == GCPhysPc)
179 {
180# ifdef VBOX_STRICT
181 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
182 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
183 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
184 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
185 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
186# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
187 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
188# else
189 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
190 {
191 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
192 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
193 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
194 }
195# endif
196 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
197 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
198 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
199#endif
200
201 /*
202 * Check them + type.
203 */
204 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
205 {
206 /*
207 * Check for interrupts and stuff.
208 */
209 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
210 * The main problem are the statistics and to some degree the logging. :/ */
211 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
212 {
213 /* Do polling. */
214 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
215 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
216 {
217 /*
218 * Success. Update statistics and switch to the next TB.
219 */
220 if (a_fWithIrqCheck)
221 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
222 else
223 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
224
225 pNewTb->cUsed += 1;
226 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
227 pVCpu->iem.s.pCurTbR3 = pNewTb;
228 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
229 pVCpu->iem.s.cTbExecNative += 1;
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
231 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
232 return (uintptr_t)pNewTb->Native.paInstructions;
233 }
234 }
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
236 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
237 }
238 else
239 {
240 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
241 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
242 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
243 }
244 }
245 else
246 {
247 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
249 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
250 }
251 }
252 else
253 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
254#else
255 NOREF(GCPhysPc);
256#endif
257
258 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
259 return 0;
260}
261
262
263/**
264 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
265 */
266template <bool const a_fWithIrqCheck>
267IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
268{
269 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
270 Assert(idxTbLookup < pTb->cTbLookupEntries);
271 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
272#if 1
273 PIEMTB const pNewTb = *ppNewTb;
274 if (pNewTb)
275 {
276 /*
277 * Calculate the flags for the next TB and check if they match.
278 */
279 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
280 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
281 { /* likely */ }
282 else
283 {
284 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
285 fFlags |= IEMTB_F_INHIBIT_SHADOW;
286 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
287 fFlags |= IEMTB_F_INHIBIT_NMI;
288 }
289 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
290 {
291 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
292 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
293 { /* likely */ }
294 else
295 fFlags |= IEMTB_F_CS_LIM_CHECKS;
296 }
297 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
298
299 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
300 {
301 /*
302 * Do the TLB lookup for flat RIP and compare the result with the next TB.
303 *
304 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
305 */
306 /* Calc the effective PC. */
307 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
308 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
309 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
310
311 /* Advance within the current buffer (PAGE) when possible. */
312 RTGCPHYS GCPhysPc;
313 uint64_t off;
314 if ( pVCpu->iem.s.pbInstrBuf
315 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
316 {
317 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
318 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
319 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
320 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
321 else
322 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
323 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
324 }
325 else
326 {
327 pVCpu->iem.s.pbInstrBuf = NULL;
328 pVCpu->iem.s.offCurInstrStart = 0;
329 pVCpu->iem.s.offInstrNextByte = 0;
330 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
331 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
332 }
333
334 if (pNewTb->GCPhysPc == GCPhysPc)
335 {
336 /*
337 * Check for interrupts and stuff.
338 */
339 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
340 * The main problem are the statistics and to some degree the logging. :/ */
341 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
342 {
343 /* Do polling. */
344 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
345 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
346 {
347 /*
348 * Success. Update statistics and switch to the next TB.
349 */
350 if (a_fWithIrqCheck)
351 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
352 else
353 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
354
355 pNewTb->cUsed += 1;
356 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
357 pVCpu->iem.s.pCurTbR3 = pNewTb;
358 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
359 pVCpu->iem.s.cTbExecNative += 1;
360 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
361 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
362 return (uintptr_t)pNewTb->Native.paInstructions;
363 }
364 }
365 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
366 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
367 }
368 else
369 {
370 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
371 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
372 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
373 }
374 }
375 else
376 {
377 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
378 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
379 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
380 }
381 }
382 else
383 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
384#else
385 NOREF(fFlags);
386 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
387#endif
388
389 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
390 return 0;
391}
392
393
394/**
395 * Used by TB code when it wants to raise a \#DE.
396 */
397IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
398{
399 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
400 iemRaiseDivideErrorJmp(pVCpu);
401#ifndef _MSC_VER
402 return VINF_IEM_RAISED_XCPT; /* not reached */
403#endif
404}
405
406
407/**
408 * Used by TB code when it wants to raise a \#UD.
409 */
410IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
411{
412 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
413 iemRaiseUndefinedOpcodeJmp(pVCpu);
414#ifndef _MSC_VER
415 return VINF_IEM_RAISED_XCPT; /* not reached */
416#endif
417}
418
419
420/**
421 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
422 *
423 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
424 */
425IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
426{
427 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
428 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
429 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
430 iemRaiseUndefinedOpcodeJmp(pVCpu);
431 else
432 iemRaiseDeviceNotAvailableJmp(pVCpu);
433#ifndef _MSC_VER
434 return VINF_IEM_RAISED_XCPT; /* not reached */
435#endif
436}
437
438
439/**
440 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
441 *
442 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
443 */
444IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
445{
446 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
447 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
448 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
449 iemRaiseUndefinedOpcodeJmp(pVCpu);
450 else
451 iemRaiseDeviceNotAvailableJmp(pVCpu);
452#ifndef _MSC_VER
453 return VINF_IEM_RAISED_XCPT; /* not reached */
454#endif
455}
456
457
458/**
459 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
460 *
461 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
462 */
463IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
464{
465 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
466 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
467 iemRaiseSimdFpExceptionJmp(pVCpu);
468 else
469 iemRaiseUndefinedOpcodeJmp(pVCpu);
470#ifndef _MSC_VER
471 return VINF_IEM_RAISED_XCPT; /* not reached */
472#endif
473}
474
475
476/**
477 * Used by TB code when it wants to raise a \#NM.
478 */
479IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
480{
481 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
482 iemRaiseDeviceNotAvailableJmp(pVCpu);
483#ifndef _MSC_VER
484 return VINF_IEM_RAISED_XCPT; /* not reached */
485#endif
486}
487
488
489/**
490 * Used by TB code when it wants to raise a \#GP(0).
491 */
492IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
493{
494 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
495 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
496#ifndef _MSC_VER
497 return VINF_IEM_RAISED_XCPT; /* not reached */
498#endif
499}
500
501
502/**
503 * Used by TB code when it wants to raise a \#MF.
504 */
505IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
506{
507 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
508 iemRaiseMathFaultJmp(pVCpu);
509#ifndef _MSC_VER
510 return VINF_IEM_RAISED_XCPT; /* not reached */
511#endif
512}
513
514
515/**
516 * Used by TB code when it wants to raise a \#XF.
517 */
518IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
519{
520 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
521 iemRaiseSimdFpExceptionJmp(pVCpu);
522#ifndef _MSC_VER
523 return VINF_IEM_RAISED_XCPT; /* not reached */
524#endif
525}
526
527
528/**
529 * Used by TB code when detecting opcode changes.
530 * @see iemThreadeFuncWorkerObsoleteTb
531 */
532IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
533{
534 /* We set fSafeToFree to false where as we're being called in the context
535 of a TB callback function, which for native TBs means we cannot release
536 the executable memory till we've returned our way back to iemTbExec as
537 that return path codes via the native code generated for the TB. */
538 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
539 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
540 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
541 return VINF_IEM_REEXEC_BREAK;
542}
543
544
545/**
546 * Used by TB code when we need to switch to a TB with CS.LIM checking.
547 */
548IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
549{
550 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
551 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
552 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
553 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
554 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
555 return VINF_IEM_REEXEC_BREAK;
556}
557
558
559/**
560 * Used by TB code when we missed a PC check after a branch.
561 */
562IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
563{
564 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
565 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
566 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
567 pVCpu->iem.s.pbInstrBuf));
568 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
569 return VINF_IEM_REEXEC_BREAK;
570}
571
572
573
574/*********************************************************************************************************************************
575* Helpers: Segmented memory fetches and stores. *
576*********************************************************************************************************************************/
577
578/**
579 * Used by TB code to load unsigned 8-bit data w/ segmentation.
580 */
581IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
582{
583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
584 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
585#else
586 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
587#endif
588}
589
590
591/**
592 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
593 * to 16 bits.
594 */
595IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
596{
597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
598 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
599#else
600 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
601#endif
602}
603
604
605/**
606 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
607 * to 32 bits.
608 */
609IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
610{
611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
612 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
613#else
614 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
615#endif
616}
617
618/**
619 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
620 * to 64 bits.
621 */
622IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
623{
624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
625 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
626#else
627 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
628#endif
629}
630
631
632/**
633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
634 */
635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
636{
637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
638 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
639#else
640 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
641#endif
642}
643
644
645/**
646 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
647 * to 32 bits.
648 */
649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
650{
651#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
652 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
653#else
654 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
655#endif
656}
657
658
659/**
660 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
661 * to 64 bits.
662 */
663IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
664{
665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
666 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
667#else
668 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
669#endif
670}
671
672
673/**
674 * Used by TB code to load unsigned 32-bit data w/ segmentation.
675 */
676IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
677{
678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
679 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
680#else
681 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
682#endif
683}
684
685
686/**
687 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
688 * to 64 bits.
689 */
690IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
691{
692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
693 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
694#else
695 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
696#endif
697}
698
699
700/**
701 * Used by TB code to load unsigned 64-bit data w/ segmentation.
702 */
703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
704{
705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
706 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
707#else
708 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
709#endif
710}
711
712
713#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
714/**
715 * Used by TB code to load 128-bit data w/ segmentation.
716 */
717IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
721#else
722 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load 128-bit data w/ segmentation.
729 */
730IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
731{
732#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
733 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
734#else
735 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
736#endif
737}
738
739
740/**
741 * Used by TB code to load 128-bit data w/ segmentation.
742 */
743IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
744{
745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
746 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
747#else
748 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
749#endif
750}
751
752
753/**
754 * Used by TB code to load 256-bit data w/ segmentation.
755 */
756IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
757{
758#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
759 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
760#else
761 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
762#endif
763}
764
765
766/**
767 * Used by TB code to load 256-bit data w/ segmentation.
768 */
769IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
770{
771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
772 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
773#else
774 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
775#endif
776}
777#endif
778
779
780/**
781 * Used by TB code to store unsigned 8-bit data w/ segmentation.
782 */
783IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
784{
785#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
786 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
787#else
788 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
789#endif
790}
791
792
793/**
794 * Used by TB code to store unsigned 16-bit data w/ segmentation.
795 */
796IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
797{
798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
799 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
800#else
801 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
802#endif
803}
804
805
806/**
807 * Used by TB code to store unsigned 32-bit data w/ segmentation.
808 */
809IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
810{
811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
812 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
813#else
814 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
815#endif
816}
817
818
819/**
820 * Used by TB code to store unsigned 64-bit data w/ segmentation.
821 */
822IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
823{
824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
825 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
826#else
827 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
828#endif
829}
830
831
832#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
833/**
834 * Used by TB code to store unsigned 128-bit data w/ segmentation.
835 */
836IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
837{
838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
839 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
840#else
841 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
842#endif
843}
844
845
846/**
847 * Used by TB code to store unsigned 128-bit data w/ segmentation.
848 */
849IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
850{
851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
852 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
853#else
854 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
855#endif
856}
857
858
859/**
860 * Used by TB code to store unsigned 256-bit data w/ segmentation.
861 */
862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
863{
864#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
865 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
866#else
867 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
868#endif
869}
870
871
872/**
873 * Used by TB code to store unsigned 256-bit data w/ segmentation.
874 */
875IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
876{
877#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
878 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
879#else
880 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
881#endif
882}
883#endif
884
885
886
887/**
888 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
889 */
890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
891{
892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
893 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
894#else
895 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
896#endif
897}
898
899
900/**
901 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
902 */
903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
904{
905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
906 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
907#else
908 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
909#endif
910}
911
912
913/**
914 * Used by TB code to store an 32-bit selector value onto a generic stack.
915 *
916 * Intel CPUs doesn't do write a whole dword, thus the special function.
917 */
918IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
919{
920#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
921 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
922#else
923 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
924#endif
925}
926
927
928/**
929 * Used by TB code to push unsigned 64-bit value onto a generic stack.
930 */
931IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
932{
933#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
934 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
935#else
936 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
937#endif
938}
939
940
941/**
942 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
943 */
944IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
945{
946#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
947 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
948#else
949 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
950#endif
951}
952
953
954/**
955 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
956 */
957IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
958{
959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
960 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
961#else
962 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
963#endif
964}
965
966
967/**
968 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
969 */
970IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
971{
972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
973 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
974#else
975 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
976#endif
977}
978
979
980
981/*********************************************************************************************************************************
982* Helpers: Flat memory fetches and stores. *
983*********************************************************************************************************************************/
984
985/**
986 * Used by TB code to load unsigned 8-bit data w/ flat address.
987 * @note Zero extending the value to 64-bit to simplify assembly.
988 */
989IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
990{
991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
992 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
993#else
994 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
995#endif
996}
997
998
999/**
1000 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1001 * to 16 bits.
1002 * @note Zero extending the value to 64-bit to simplify assembly.
1003 */
1004IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1005{
1006#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1007 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1008#else
1009 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1010#endif
1011}
1012
1013
1014/**
1015 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1016 * to 32 bits.
1017 * @note Zero extending the value to 64-bit to simplify assembly.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1022 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1023#else
1024 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1025#endif
1026}
1027
1028
1029/**
1030 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1031 * to 64 bits.
1032 */
1033IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1034{
1035#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1036 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1037#else
1038 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1039#endif
1040}
1041
1042
1043/**
1044 * Used by TB code to load unsigned 16-bit data w/ flat address.
1045 * @note Zero extending the value to 64-bit to simplify assembly.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1050 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1051#else
1052 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1059 * to 32 bits.
1060 * @note Zero extending the value to 64-bit to simplify assembly.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1065 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1066#else
1067 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1074 * to 64 bits.
1075 * @note Zero extending the value to 64-bit to simplify assembly.
1076 */
1077IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1078{
1079#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1080 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1081#else
1082 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1083#endif
1084}
1085
1086
1087/**
1088 * Used by TB code to load unsigned 32-bit data w/ flat address.
1089 * @note Zero extending the value to 64-bit to simplify assembly.
1090 */
1091IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1092{
1093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1094 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1095#else
1096 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1097#endif
1098}
1099
1100
1101/**
1102 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1103 * to 64 bits.
1104 * @note Zero extending the value to 64-bit to simplify assembly.
1105 */
1106IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1107{
1108#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1109 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1110#else
1111 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1112#endif
1113}
1114
1115
1116/**
1117 * Used by TB code to load unsigned 64-bit data w/ flat address.
1118 */
1119IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1120{
1121#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1122 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1123#else
1124 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1125#endif
1126}
1127
1128
1129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1130/**
1131 * Used by TB code to load unsigned 128-bit data w/ flat address.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1134{
1135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1136 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1137#else
1138 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1139#endif
1140}
1141
1142
1143/**
1144 * Used by TB code to load unsigned 128-bit data w/ flat address.
1145 */
1146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1147{
1148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1149 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1150#else
1151 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1152#endif
1153}
1154
1155
1156/**
1157 * Used by TB code to load unsigned 128-bit data w/ flat address.
1158 */
1159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1160{
1161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1162 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1163#else
1164 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1165#endif
1166}
1167
1168
1169/**
1170 * Used by TB code to load unsigned 256-bit data w/ flat address.
1171 */
1172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1173{
1174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1175 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1176#else
1177 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1178#endif
1179}
1180
1181
1182/**
1183 * Used by TB code to load unsigned 256-bit data w/ flat address.
1184 */
1185IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1186{
1187#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1188 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1189#else
1190 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1191#endif
1192}
1193#endif
1194
1195
1196/**
1197 * Used by TB code to store unsigned 8-bit data w/ flat address.
1198 */
1199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1200{
1201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1202 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1203#else
1204 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1205#endif
1206}
1207
1208
1209/**
1210 * Used by TB code to store unsigned 16-bit data w/ flat address.
1211 */
1212IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1213{
1214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1215 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1216#else
1217 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1218#endif
1219}
1220
1221
1222/**
1223 * Used by TB code to store unsigned 32-bit data w/ flat address.
1224 */
1225IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1226{
1227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1228 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1229#else
1230 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1231#endif
1232}
1233
1234
1235/**
1236 * Used by TB code to store unsigned 64-bit data w/ flat address.
1237 */
1238IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1239{
1240#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1241 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1242#else
1243 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1244#endif
1245}
1246
1247
1248#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1249/**
1250 * Used by TB code to store unsigned 128-bit data w/ flat address.
1251 */
1252IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1253{
1254#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1255 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1256#else
1257 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1258#endif
1259}
1260
1261
1262/**
1263 * Used by TB code to store unsigned 128-bit data w/ flat address.
1264 */
1265IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1266{
1267#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1268 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1269#else
1270 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1271#endif
1272}
1273
1274
1275/**
1276 * Used by TB code to store unsigned 256-bit data w/ flat address.
1277 */
1278IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1279{
1280#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1281 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1282#else
1283 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1284#endif
1285}
1286
1287
1288/**
1289 * Used by TB code to store unsigned 256-bit data w/ flat address.
1290 */
1291IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1292{
1293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1294 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1295#else
1296 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1297#endif
1298}
1299#endif
1300
1301
1302
1303/**
1304 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1305 */
1306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1307{
1308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1309 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1310#else
1311 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1312#endif
1313}
1314
1315
1316/**
1317 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1318 */
1319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1322 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1323#else
1324 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to store a segment selector value onto a flat stack.
1331 *
1332 * Intel CPUs doesn't do write a whole dword, thus the special function.
1333 */
1334IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1335{
1336#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1337 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1338#else
1339 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1340#endif
1341}
1342
1343
1344/**
1345 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1346 */
1347IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1350 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1351#else
1352 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1361{
1362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1363 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1364#else
1365 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1366#endif
1367}
1368
1369
1370/**
1371 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1372 */
1373IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1374{
1375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1376 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1377#else
1378 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1379#endif
1380}
1381
1382
1383/**
1384 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1385 */
1386IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1387{
1388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1389 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1390#else
1391 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1392#endif
1393}
1394
1395
1396
1397/*********************************************************************************************************************************
1398* Helpers: Segmented memory mapping. *
1399*********************************************************************************************************************************/
1400
1401/**
1402 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1403 * segmentation.
1404 */
1405IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1406 RTGCPTR GCPtrMem, uint8_t iSegReg))
1407{
1408#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1409 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#else
1411 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1412#endif
1413}
1414
1415
1416/**
1417 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1418 */
1419IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1420 RTGCPTR GCPtrMem, uint8_t iSegReg))
1421{
1422#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1423 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#else
1425 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1426#endif
1427}
1428
1429
1430/**
1431 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1432 */
1433IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1434 RTGCPTR GCPtrMem, uint8_t iSegReg))
1435{
1436#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1437 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#else
1439 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1440#endif
1441}
1442
1443
1444/**
1445 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1446 */
1447IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1448 RTGCPTR GCPtrMem, uint8_t iSegReg))
1449{
1450#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1451 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1452#else
1453 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1454#endif
1455}
1456
1457
1458/**
1459 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1460 * segmentation.
1461 */
1462IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1463 RTGCPTR GCPtrMem, uint8_t iSegReg))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1467#else
1468 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1477 RTGCPTR GCPtrMem, uint8_t iSegReg))
1478{
1479#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1480 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1481#else
1482 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1483#endif
1484}
1485
1486
1487/**
1488 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1489 */
1490IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1491 RTGCPTR GCPtrMem, uint8_t iSegReg))
1492{
1493#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1494 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1495#else
1496 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1497#endif
1498}
1499
1500
1501/**
1502 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1503 */
1504IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1505 RTGCPTR GCPtrMem, uint8_t iSegReg))
1506{
1507#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1508 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1509#else
1510 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1511#endif
1512}
1513
1514
1515/**
1516 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1517 * segmentation.
1518 */
1519IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1520 RTGCPTR GCPtrMem, uint8_t iSegReg))
1521{
1522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1523 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1524#else
1525 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1526#endif
1527}
1528
1529
1530/**
1531 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1532 */
1533IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1534 RTGCPTR GCPtrMem, uint8_t iSegReg))
1535{
1536#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1537 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1538#else
1539 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1540#endif
1541}
1542
1543
1544/**
1545 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1546 */
1547IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1548 RTGCPTR GCPtrMem, uint8_t iSegReg))
1549{
1550#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1551 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1552#else
1553 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1554#endif
1555}
1556
1557
1558/**
1559 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1560 */
1561IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1562 RTGCPTR GCPtrMem, uint8_t iSegReg))
1563{
1564#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1565 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1566#else
1567 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1568#endif
1569}
1570
1571
1572/**
1573 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1574 * segmentation.
1575 */
1576IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1577 RTGCPTR GCPtrMem, uint8_t iSegReg))
1578{
1579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1580 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1581#else
1582 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1583#endif
1584}
1585
1586
1587/**
1588 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1591 RTGCPTR GCPtrMem, uint8_t iSegReg))
1592{
1593#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1594 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1595#else
1596 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1597#endif
1598}
1599
1600
1601/**
1602 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1603 */
1604IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1605 RTGCPTR GCPtrMem, uint8_t iSegReg))
1606{
1607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1608 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1609#else
1610 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1619 RTGCPTR GCPtrMem, uint8_t iSegReg))
1620{
1621#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1622 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1623#else
1624 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1633 RTGCPTR GCPtrMem, uint8_t iSegReg))
1634{
1635#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1636 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1637#else
1638 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1639#endif
1640}
1641
1642
1643/**
1644 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1647 RTGCPTR GCPtrMem, uint8_t iSegReg))
1648{
1649#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1650 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1651#else
1652 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1653#endif
1654}
1655
1656
1657/**
1658 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1659 * segmentation.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1662 RTGCPTR GCPtrMem, uint8_t iSegReg))
1663{
1664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1665 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1666#else
1667 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1668#endif
1669}
1670
1671
1672/**
1673 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1674 */
1675IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1676 RTGCPTR GCPtrMem, uint8_t iSegReg))
1677{
1678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1679 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1680#else
1681 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1682#endif
1683}
1684
1685
1686/**
1687 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1690 RTGCPTR GCPtrMem, uint8_t iSegReg))
1691{
1692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1693 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1694#else
1695 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1696#endif
1697}
1698
1699
1700/**
1701 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1704 RTGCPTR GCPtrMem, uint8_t iSegReg))
1705{
1706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1707 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1708#else
1709 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1710#endif
1711}
1712
1713
1714/*********************************************************************************************************************************
1715* Helpers: Flat memory mapping. *
1716*********************************************************************************************************************************/
1717
1718/**
1719 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1720 * address.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1723{
1724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1725 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1726#else
1727 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1728#endif
1729}
1730
1731
1732/**
1733 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1734 */
1735IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1736{
1737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1738 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1739#else
1740 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1741#endif
1742}
1743
1744
1745/**
1746 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1747 */
1748IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1749{
1750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1751 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1752#else
1753 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1754#endif
1755}
1756
1757
1758/**
1759 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1764 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1765#else
1766 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1773 * address.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1778 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1779#else
1780 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1791 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1792#else
1793 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1800 */
1801IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1802{
1803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1804 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1805#else
1806 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1807#endif
1808}
1809
1810
1811/**
1812 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1813 */
1814IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1815{
1816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1817 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1818#else
1819 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1820#endif
1821}
1822
1823
1824/**
1825 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1826 * address.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1831 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1832#else
1833 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1844 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1845#else
1846 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1857 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1858#else
1859 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1870 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1871#else
1872 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1879 * address.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1884 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1885#else
1886 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1897 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1898#else
1899 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1910 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1911#else
1912 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1923 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1924#else
1925 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1936 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1937#else
1938 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1949 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1950#else
1951 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1952#endif
1953}
1954
1955
1956/**
1957 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1958 * address.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1963 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1964#else
1965 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1976 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1977#else
1978 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1989 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1990#else
1991 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2002 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2003#else
2004 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2005#endif
2006}
2007
2008
2009/*********************************************************************************************************************************
2010* Helpers: Commit, rollback & unmap *
2011*********************************************************************************************************************************/
2012
2013/**
2014 * Used by TB code to commit and unmap a read-write memory mapping.
2015 */
2016IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2017{
2018 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2019}
2020
2021
2022/**
2023 * Used by TB code to commit and unmap a read-write memory mapping.
2024 */
2025IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2026{
2027 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2028}
2029
2030
2031/**
2032 * Used by TB code to commit and unmap a write-only memory mapping.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2035{
2036 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2037}
2038
2039
2040/**
2041 * Used by TB code to commit and unmap a read-only memory mapping.
2042 */
2043IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2044{
2045 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2046}
2047
2048
2049/**
2050 * Reinitializes the native recompiler state.
2051 *
2052 * Called before starting a new recompile job.
2053 */
2054static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2055{
2056 pReNative->cLabels = 0;
2057 pReNative->bmLabelTypes = 0;
2058 pReNative->cFixups = 0;
2059 pReNative->cTbExitFixups = 0;
2060#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2061 pReNative->pDbgInfo->cEntries = 0;
2062 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2063#endif
2064 pReNative->pTbOrg = pTb;
2065 pReNative->cCondDepth = 0;
2066 pReNative->uCondSeqNo = 0;
2067 pReNative->uCheckIrqSeqNo = 0;
2068 pReNative->uTlbSeqNo = 0;
2069#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2070 pReNative->fSkippingEFlags = 0;
2071#endif
2072#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2073 pReNative->PostponedEfl.fEFlags = 0;
2074 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid;
2075 pReNative->PostponedEfl.cOpBits = 0;
2076 pReNative->PostponedEfl.idxReg1 = UINT8_MAX;
2077 pReNative->PostponedEfl.idxReg2 = UINT8_MAX;
2078#endif
2079
2080#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2081 pReNative->Core.offPc = 0;
2082# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2083 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2084# endif
2085# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2086 pReNative->Core.fDebugPcInitialized = false;
2087# endif
2088#endif
2089#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2090 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2091#endif
2092 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2093#if IEMNATIVE_HST_GREG_COUNT < 32
2094 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2095#endif
2096 ;
2097 pReNative->Core.bmHstRegsWithGstShadow = 0;
2098 pReNative->Core.bmGstRegShadows = 0;
2099#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2100 pReNative->Core.bmGstRegShadowDirty = 0;
2101#endif
2102 pReNative->Core.bmVars = 0;
2103 pReNative->Core.bmStack = 0;
2104 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2105 pReNative->Core.u64ArgVars = UINT64_MAX;
2106
2107 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2108 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2120 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2121 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2122 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2123 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2124 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2125 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2126 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2127 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2128 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2129 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2130 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2131
2132 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2133
2134 /* Full host register reinit: */
2135 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2136 {
2137 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2138 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2139 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2140 }
2141
2142 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2143 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2144#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2145 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2146#endif
2147#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2148 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2149#endif
2150#ifdef IEMNATIVE_REG_FIXED_TMP1
2151 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2152#endif
2153#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2154 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2155#endif
2156 );
2157 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2158 {
2159 fRegs &= ~RT_BIT_32(idxReg);
2160 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2161 }
2162
2163 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2164#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2165 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2166#endif
2167#ifdef IEMNATIVE_REG_FIXED_TMP0
2168 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2169#endif
2170#ifdef IEMNATIVE_REG_FIXED_TMP1
2171 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2172#endif
2173#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2174 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2175#endif
2176
2177#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2178 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2179# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2180 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2181# endif
2182 ;
2183 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2184 pReNative->Core.bmGstSimdRegShadows = 0;
2185 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2186 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2187
2188 /* Full host register reinit: */
2189 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2190 {
2191 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2192 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2193 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2194 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2195 }
2196
2197 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2198 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2199 {
2200 fRegs &= ~RT_BIT_32(idxReg);
2201 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2202 }
2203
2204#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2205 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2206#endif
2207
2208#endif
2209
2210 return pReNative;
2211}
2212
2213
2214/**
2215 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2216 */
2217static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2218{
2219 RTMemFree(pReNative->pInstrBuf);
2220 RTMemFree(pReNative->paLabels);
2221 RTMemFree(pReNative->paFixups);
2222 RTMemFree(pReNative->paTbExitFixups);
2223#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2224 RTMemFree(pReNative->pDbgInfo);
2225#endif
2226 RTMemFree(pReNative);
2227}
2228
2229
2230/**
2231 * Allocates and initializes the native recompiler state.
2232 *
2233 * This is called the first time an EMT wants to recompile something.
2234 *
2235 * @returns Pointer to the new recompiler state.
2236 * @param pVCpu The cross context virtual CPU structure of the calling
2237 * thread.
2238 * @param pTb The TB that's about to be recompiled. When this is NULL,
2239 * the recompiler state is for emitting the common per-chunk
2240 * code from iemNativeRecompileAttachExecMemChunkCtx.
2241 * @thread EMT(pVCpu)
2242 */
2243static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2244{
2245 VMCPU_ASSERT_EMT(pVCpu);
2246
2247 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2248 AssertReturn(pReNative, NULL);
2249
2250 /*
2251 * Try allocate all the buffers and stuff we need.
2252 */
2253 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2254 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2255 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2256 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2257 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2258#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2259 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2260#endif
2261 if (RT_LIKELY( pReNative->pInstrBuf
2262 && pReNative->paLabels
2263 && pReNative->paFixups
2264 && pReNative->paTbExitFixups)
2265#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2266 && pReNative->pDbgInfo
2267#endif
2268 )
2269 {
2270 /*
2271 * Set the buffer & array sizes on success.
2272 */
2273 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2274 pReNative->cLabelsAlloc = _8K / cFactor;
2275 pReNative->cFixupsAlloc = _16K / cFactor;
2276 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2277#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2278 pReNative->cDbgInfoAlloc = _16K / cFactor;
2279#endif
2280
2281 /* Other constant stuff: */
2282 pReNative->pVCpu = pVCpu;
2283
2284 /*
2285 * Done, just reinit it.
2286 */
2287 return iemNativeReInit(pReNative, pTb);
2288 }
2289
2290 /*
2291 * Failed. Cleanup and return.
2292 */
2293 AssertFailed();
2294 iemNativeTerm(pReNative);
2295 return NULL;
2296}
2297
2298
2299/**
2300 * Creates a label
2301 *
2302 * If the label does not yet have a defined position,
2303 * call iemNativeLabelDefine() later to set it.
2304 *
2305 * @returns Label ID. Throws VBox status code on failure, so no need to check
2306 * the return value.
2307 * @param pReNative The native recompile state.
2308 * @param enmType The label type.
2309 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2310 * label is not yet defined (default).
2311 * @param uData Data associated with the lable. Only applicable to
2312 * certain type of labels. Default is zero.
2313 */
2314DECL_HIDDEN_THROW(uint32_t)
2315iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2316 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2317{
2318 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2319#if defined(RT_ARCH_AMD64)
2320 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2321#endif
2322
2323 /*
2324 * Locate existing label definition.
2325 *
2326 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2327 * and uData is zero.
2328 */
2329 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2330 uint32_t const cLabels = pReNative->cLabels;
2331 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2332#ifndef VBOX_STRICT
2333 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2334 && offWhere == UINT32_MAX
2335 && uData == 0
2336#endif
2337 )
2338 {
2339#ifndef VBOX_STRICT
2340 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2342 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2343 if (idxLabel < pReNative->cLabels)
2344 return idxLabel;
2345#else
2346 for (uint32_t i = 0; i < cLabels; i++)
2347 if ( paLabels[i].enmType == enmType
2348 && paLabels[i].uData == uData)
2349 {
2350 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2351 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2352 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2353 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2354 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2355 return i;
2356 }
2357 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2358 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2359#endif
2360 }
2361
2362 /*
2363 * Make sure we've got room for another label.
2364 */
2365 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2366 { /* likely */ }
2367 else
2368 {
2369 uint32_t cNew = pReNative->cLabelsAlloc;
2370 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2371 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2372 cNew *= 2;
2373 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2374 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2375 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2376 pReNative->paLabels = paLabels;
2377 pReNative->cLabelsAlloc = cNew;
2378 }
2379
2380 /*
2381 * Define a new label.
2382 */
2383 paLabels[cLabels].off = offWhere;
2384 paLabels[cLabels].enmType = enmType;
2385 paLabels[cLabels].uData = uData;
2386 pReNative->cLabels = cLabels + 1;
2387
2388 Assert((unsigned)enmType < 64);
2389 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2390
2391 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2392 {
2393 Assert(uData == 0);
2394 pReNative->aidxUniqueLabels[enmType] = cLabels;
2395 }
2396
2397 if (offWhere != UINT32_MAX)
2398 {
2399#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2400 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2401 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2402#endif
2403 }
2404 return cLabels;
2405}
2406
2407
2408/**
2409 * Defines the location of an existing label.
2410 *
2411 * @param pReNative The native recompile state.
2412 * @param idxLabel The label to define.
2413 * @param offWhere The position.
2414 */
2415DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2416{
2417 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2418 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2419 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2420 pLabel->off = offWhere;
2421#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2422 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2423 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2424#endif
2425}
2426
2427
2428/**
2429 * Looks up a lable.
2430 *
2431 * @returns Label ID if found, UINT32_MAX if not.
2432 */
2433DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2434 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2435{
2436 Assert((unsigned)enmType < 64);
2437 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2438 {
2439 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2440 return pReNative->aidxUniqueLabels[enmType];
2441
2442 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2443 uint32_t const cLabels = pReNative->cLabels;
2444 for (uint32_t i = 0; i < cLabels; i++)
2445 if ( paLabels[i].enmType == enmType
2446 && paLabels[i].uData == uData
2447 && ( paLabels[i].off == offWhere
2448 || offWhere == UINT32_MAX
2449 || paLabels[i].off == UINT32_MAX))
2450 return i;
2451 }
2452 return UINT32_MAX;
2453}
2454
2455
2456/**
2457 * Adds a fixup.
2458 *
2459 * @throws VBox status code (int) on failure.
2460 * @param pReNative The native recompile state.
2461 * @param offWhere The instruction offset of the fixup location.
2462 * @param idxLabel The target label ID for the fixup.
2463 * @param enmType The fixup type.
2464 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2465 */
2466DECL_HIDDEN_THROW(void)
2467iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2468 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2469{
2470 Assert(idxLabel <= UINT16_MAX);
2471 Assert((unsigned)enmType <= UINT8_MAX);
2472#ifdef RT_ARCH_ARM64
2473 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2474 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2476#endif
2477
2478 /*
2479 * Make sure we've room.
2480 */
2481 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2482 uint32_t const cFixups = pReNative->cFixups;
2483 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2484 { /* likely */ }
2485 else
2486 {
2487 uint32_t cNew = pReNative->cFixupsAlloc;
2488 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2489 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2490 cNew *= 2;
2491 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2492 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2493 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2494 pReNative->paFixups = paFixups;
2495 pReNative->cFixupsAlloc = cNew;
2496 }
2497
2498 /*
2499 * Add the fixup.
2500 */
2501 paFixups[cFixups].off = offWhere;
2502 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2503 paFixups[cFixups].enmType = enmType;
2504 paFixups[cFixups].offAddend = offAddend;
2505 pReNative->cFixups = cFixups + 1;
2506}
2507
2508
2509/**
2510 * Adds a fixup to the per chunk tail code.
2511 *
2512 * @throws VBox status code (int) on failure.
2513 * @param pReNative The native recompile state.
2514 * @param offWhere The instruction offset of the fixup location.
2515 * @param enmExitReason The exit reason to jump to.
2516 */
2517DECL_HIDDEN_THROW(void)
2518iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2519{
2520 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2521
2522 /*
2523 * Make sure we've room.
2524 */
2525 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2526 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2527 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2528 { /* likely */ }
2529 else
2530 {
2531 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2532 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2533 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2534 cNew *= 2;
2535 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2536 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2537 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2538 pReNative->paTbExitFixups = paTbExitFixups;
2539 pReNative->cTbExitFixupsAlloc = cNew;
2540 }
2541
2542 /*
2543 * Add the fixup.
2544 */
2545 paTbExitFixups[cTbExitFixups].off = offWhere;
2546 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2547 pReNative->cTbExitFixups = cTbExitFixups + 1;
2548}
2549
2550
2551/**
2552 * Slow code path for iemNativeInstrBufEnsure.
2553 */
2554DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2555{
2556 /* Double the buffer size till we meet the request. */
2557 uint32_t cNew = pReNative->cInstrBufAlloc;
2558 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2559 do
2560 cNew *= 2;
2561 while (cNew < off + cInstrReq);
2562
2563 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2564#ifdef RT_ARCH_ARM64
2565 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2566#else
2567 uint32_t const cbMaxInstrBuf = _2M;
2568#endif
2569 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2570
2571 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2572 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2573
2574#ifdef VBOX_STRICT
2575 pReNative->offInstrBufChecked = off + cInstrReq;
2576#endif
2577 pReNative->cInstrBufAlloc = cNew;
2578 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2579}
2580
2581#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2582
2583/**
2584 * Grows the static debug info array used during recompilation.
2585 *
2586 * @returns Pointer to the new debug info block; throws VBox status code on
2587 * failure, so no need to check the return value.
2588 */
2589DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2590{
2591 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2592 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2593 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2594 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2595 pReNative->pDbgInfo = pDbgInfo;
2596 pReNative->cDbgInfoAlloc = cNew;
2597 return pDbgInfo;
2598}
2599
2600
2601/**
2602 * Adds a new debug info uninitialized entry, returning the pointer to it.
2603 */
2604DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2605{
2606 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2607 { /* likely */ }
2608 else
2609 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2610 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2611}
2612
2613
2614/**
2615 * Debug Info: Adds a native offset record, if necessary.
2616 */
2617DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2618{
2619 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2620
2621 /*
2622 * Do we need this one?
2623 */
2624 uint32_t const offPrev = pDbgInfo->offNativeLast;
2625 if (offPrev == off)
2626 return;
2627 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2628
2629 /*
2630 * Add it.
2631 */
2632 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2633 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2634 pEntry->NativeOffset.offNative = off;
2635 pDbgInfo->offNativeLast = off;
2636}
2637
2638
2639/**
2640 * Debug Info: Record info about a label.
2641 */
2642static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2643{
2644 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2645 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2646 pEntry->Label.uUnused = 0;
2647 pEntry->Label.enmLabel = (uint8_t)enmType;
2648 pEntry->Label.uData = uData;
2649}
2650
2651
2652/**
2653 * Debug Info: Record info about a threaded call.
2654 */
2655static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2656{
2657 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2658 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2659 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2660 pEntry->ThreadedCall.uUnused = 0;
2661 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2662}
2663
2664
2665/**
2666 * Debug Info: Record info about a new guest instruction.
2667 */
2668static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2669{
2670 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2671 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2672 pEntry->GuestInstruction.uUnused = 0;
2673 pEntry->GuestInstruction.fExec = fExec;
2674}
2675
2676
2677/**
2678 * Debug Info: Record info about guest register shadowing.
2679 */
2680DECL_HIDDEN_THROW(void)
2681iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2682 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2683{
2684 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2685 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2686 pEntry->GuestRegShadowing.uUnused = 0;
2687 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2688 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2689 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2690#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2691 Assert( idxHstReg != UINT8_MAX
2692 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2693#endif
2694}
2695
2696
2697# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2698/**
2699 * Debug Info: Record info about guest register shadowing.
2700 */
2701DECL_HIDDEN_THROW(void)
2702iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2703 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2704{
2705 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2706 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2707 pEntry->GuestSimdRegShadowing.uUnused = 0;
2708 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2709 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2710 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2711}
2712# endif
2713
2714
2715# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2716/**
2717 * Debug Info: Record info about delayed RIP updates.
2718 */
2719DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2720{
2721 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2722 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2723 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2724 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2725}
2726# endif
2727
2728# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2729
2730/**
2731 * Debug Info: Record info about a dirty guest register.
2732 */
2733DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2734 uint8_t idxGstReg, uint8_t idxHstReg)
2735{
2736 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2737 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2738 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2739 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2740 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2741}
2742
2743
2744/**
2745 * Debug Info: Record info about a dirty guest register writeback operation.
2746 */
2747DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2748{
2749 unsigned const cBitsGstRegMask = 25;
2750 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2751
2752 /* The first block of 25 bits: */
2753 if (fGstReg & fGstRegMask)
2754 {
2755 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2756 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2757 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2758 pEntry->GuestRegWriteback.cShift = 0;
2759 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2760 fGstReg &= ~(uint64_t)fGstRegMask;
2761 if (!fGstReg)
2762 return;
2763 }
2764
2765 /* The second block of 25 bits: */
2766 fGstReg >>= cBitsGstRegMask;
2767 if (fGstReg & fGstRegMask)
2768 {
2769 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2770 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2771 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2772 pEntry->GuestRegWriteback.cShift = 0;
2773 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2774 fGstReg &= ~(uint64_t)fGstRegMask;
2775 if (!fGstReg)
2776 return;
2777 }
2778
2779 /* The last block with 14 bits: */
2780 fGstReg >>= cBitsGstRegMask;
2781 Assert(fGstReg & fGstRegMask);
2782 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2783 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2784 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2785 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2786 pEntry->GuestRegWriteback.cShift = 2;
2787 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2788}
2789
2790# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2791
2792# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2793/**
2794 * Debug Info: Record info about emitting a postponed EFLAGS calculation.
2795 */
2796DECL_HIDDEN_THROW(void)
2797iemNativeDbgInfoAddPostponedEFlagsCalc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVE_POSTPONED_EFL_OP_T enmOp,
2798 uint8_t cOpBits, uint8_t idxEmit)
2799{
2800 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2801 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2802 pEntry->PostponedEflCalc.uType = kIemTbDbgEntryType_PostponedEFlagsCalc;
2803 pEntry->PostponedEflCalc.enmOp = (unsigned)enmOp;
2804 pEntry->PostponedEflCalc.cOpBits = cOpBits;
2805 pEntry->PostponedEflCalc.idxEmit = idxEmit;
2806 pEntry->PostponedEflCalc.uUnused = 0;
2807}
2808# endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */
2809
2810#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2811
2812
2813/*********************************************************************************************************************************
2814* Register Allocator *
2815*********************************************************************************************************************************/
2816
2817/**
2818 * Register parameter indexes (indexed by argument number).
2819 */
2820DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2821{
2822 IEMNATIVE_CALL_ARG0_GREG,
2823 IEMNATIVE_CALL_ARG1_GREG,
2824 IEMNATIVE_CALL_ARG2_GREG,
2825 IEMNATIVE_CALL_ARG3_GREG,
2826#if defined(IEMNATIVE_CALL_ARG4_GREG)
2827 IEMNATIVE_CALL_ARG4_GREG,
2828# if defined(IEMNATIVE_CALL_ARG5_GREG)
2829 IEMNATIVE_CALL_ARG5_GREG,
2830# if defined(IEMNATIVE_CALL_ARG6_GREG)
2831 IEMNATIVE_CALL_ARG6_GREG,
2832# if defined(IEMNATIVE_CALL_ARG7_GREG)
2833 IEMNATIVE_CALL_ARG7_GREG,
2834# endif
2835# endif
2836# endif
2837#endif
2838};
2839AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2840
2841/**
2842 * Call register masks indexed by argument count.
2843 */
2844DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2845{
2846 0,
2847 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2848 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2849 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2850 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2851 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2852#if defined(IEMNATIVE_CALL_ARG4_GREG)
2853 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2854 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2855# if defined(IEMNATIVE_CALL_ARG5_GREG)
2856 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2857 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2858# if defined(IEMNATIVE_CALL_ARG6_GREG)
2859 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2860 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2861 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2862# if defined(IEMNATIVE_CALL_ARG7_GREG)
2863 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2864 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2865 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2866# endif
2867# endif
2868# endif
2869#endif
2870};
2871
2872#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2873/**
2874 * BP offset of the stack argument slots.
2875 *
2876 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2877 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2878 */
2879DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2880{
2881 IEMNATIVE_FP_OFF_STACK_ARG0,
2882# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2883 IEMNATIVE_FP_OFF_STACK_ARG1,
2884# endif
2885# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2886 IEMNATIVE_FP_OFF_STACK_ARG2,
2887# endif
2888# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2889 IEMNATIVE_FP_OFF_STACK_ARG3,
2890# endif
2891};
2892AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2893#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2894
2895/**
2896 * Info about shadowed guest register values.
2897 * @see IEMNATIVEGSTREG
2898 */
2899DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2900{
2901#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2902 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2903 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2904 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2905 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2906 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2907 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2908 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2909 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2910 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2911 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2912 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2913 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2914 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2915 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2916 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2917 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2918 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2919 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2920 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2921 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2922 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2923 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2924 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2925 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2926 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2927 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2928 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2929 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2930 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2931 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2932 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2933 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2934 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2935 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2936 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2937 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2938 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2939 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2940 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2941 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2942 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2943 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2944 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2945 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2946 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2947 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2948 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2949 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2950 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2951 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2952 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2953 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2954 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2955 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2956#undef CPUMCTX_OFF_AND_SIZE
2957};
2958AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2959
2960
2961/** Host CPU general purpose register names. */
2962DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2963{
2964#ifdef RT_ARCH_AMD64
2965 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2966#elif RT_ARCH_ARM64
2967 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2968 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2969#else
2970# error "port me"
2971#endif
2972};
2973
2974
2975#if 0 /* unused */
2976/**
2977 * Tries to locate a suitable register in the given register mask.
2978 *
2979 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2980 * failed.
2981 *
2982 * @returns Host register number on success, returns UINT8_MAX on failure.
2983 */
2984static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2985{
2986 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2987 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2988 if (fRegs)
2989 {
2990 /** @todo pick better here: */
2991 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2992
2993 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2994 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2995 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2996 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2997
2998 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2999 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3000 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3001 return idxReg;
3002 }
3003 return UINT8_MAX;
3004}
3005#endif /* unused */
3006
3007#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3008
3009/**
3010 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
3011 *
3012 * @returns New code buffer offset on success, UINT32_MAX on failure.
3013 * @param pReNative .
3014 * @param off The current code buffer position.
3015 * @param enmGstReg The guest register to store to.
3016 * @param idxHstReg The host register to store from.
3017 */
3018DECL_FORCE_INLINE_THROW(uint32_t)
3019iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3020{
3021 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3022 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3023
3024 switch (g_aGstShadowInfo[enmGstReg].cb)
3025 {
3026 case sizeof(uint64_t):
3027 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3028 case sizeof(uint32_t):
3029 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3030 case sizeof(uint16_t):
3031 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3032# if 0 /* not present in the table. */
3033 case sizeof(uint8_t):
3034 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3035# endif
3036 default:
3037 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3038 }
3039}
3040
3041
3042/**
3043 * Emits code to flush a pending write of the given guest register,
3044 * version with alternative core state.
3045 *
3046 * @returns New code buffer offset.
3047 * @param pReNative The native recompile state.
3048 * @param off Current code buffer position.
3049 * @param pCore Alternative core state.
3050 * @param enmGstReg The guest register to flush.
3051 */
3052DECL_HIDDEN_THROW(uint32_t)
3053iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3054{
3055 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3056
3057 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3058 && enmGstReg <= kIemNativeGstReg_GprLast)
3059 || enmGstReg == kIemNativeGstReg_MxCsr);
3060 Assert( idxHstReg != UINT8_MAX
3061 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3062 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3063 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3064
3065 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3066
3067 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3068 return off;
3069}
3070
3071
3072/**
3073 * Emits code to flush a pending write of the given guest register.
3074 *
3075 * @returns New code buffer offset.
3076 * @param pReNative The native recompile state.
3077 * @param off Current code buffer position.
3078 * @param enmGstReg The guest register to flush.
3079 */
3080DECL_HIDDEN_THROW(uint32_t)
3081iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3082{
3083 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3084
3085 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3086 && enmGstReg <= kIemNativeGstReg_GprLast)
3087 || enmGstReg == kIemNativeGstReg_MxCsr);
3088 Assert( idxHstReg != UINT8_MAX
3089 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3090 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3091 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3092
3093 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3094
3095 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3096 return off;
3097}
3098
3099
3100/**
3101 * Flush the given set of guest registers if marked as dirty.
3102 *
3103 * @returns New code buffer offset.
3104 * @param pReNative The native recompile state.
3105 * @param off Current code buffer position.
3106 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3107 * @note Must not modify the host status flags!
3108 */
3109DECL_HIDDEN_THROW(uint32_t)
3110iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3111{
3112 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3113 if (bmGstRegShadowDirty)
3114 {
3115# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3116 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3117 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3118# endif
3119 do
3120 {
3121 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3122 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3123 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3124 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3125 } while (bmGstRegShadowDirty);
3126 }
3127
3128 return off;
3129}
3130
3131
3132/**
3133 * Flush all shadowed guest registers marked as dirty for the given host register.
3134 *
3135 * @returns New code buffer offset.
3136 * @param pReNative The native recompile state.
3137 * @param off Current code buffer position.
3138 * @param idxHstReg The host register.
3139 *
3140 * @note This doesn't do any unshadowing of guest registers from the host register.
3141 *
3142 * @note Must not modify the host status flags!
3143 */
3144DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3145{
3146 /* We need to flush any pending guest register writes this host register shadows. */
3147 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3148 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3149 {
3150# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3151 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3152 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3153# endif
3154 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3155 do
3156 {
3157 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3158 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3159 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3160 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3161 } while (bmGstRegShadowDirty);
3162 }
3163
3164 return off;
3165}
3166
3167#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3168
3169
3170/**
3171 * Locate a register, possibly freeing one up.
3172 *
3173 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3174 * failed.
3175 *
3176 * @returns Host register number on success. Returns UINT8_MAX if no registers
3177 * found, the caller is supposed to deal with this and raise a
3178 * allocation type specific status code (if desired).
3179 *
3180 * @throws VBox status code if we're run into trouble spilling a variable of
3181 * recording debug info. Does NOT throw anything if we're out of
3182 * registers, though.
3183 *
3184 * @note Must not modify the host status flags!
3185 */
3186static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3187 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3188{
3189 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3190 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3191 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3192
3193 /*
3194 * Try a freed register that's shadowing a guest register.
3195 */
3196 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3197 if (fRegs)
3198 {
3199 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3200
3201#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3202 /*
3203 * When we have liveness information, we use it to kick out all shadowed
3204 * guest register that will not be needed any more in this TB. If we're
3205 * lucky, this may prevent us from ending up here again.
3206 *
3207 * Note! We must consider the previous entry here so we don't free
3208 * anything that the current threaded function requires (current
3209 * entry is produced by the next threaded function).
3210 */
3211 uint32_t const idxCurCall = pReNative->idxCurCall;
3212 if (idxCurCall > 0)
3213 {
3214 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3215 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3216
3217 /* Merge EFLAGS. */
3218 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3219 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3220 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3221 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3222 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3223
3224 /* If it matches any shadowed registers. */
3225 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3226 {
3227#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3228 /* Writeback any dirty shadow registers we are about to unshadow. */
3229 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3230#endif
3231
3232 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3233 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3234 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3235
3236 /* See if we've got any unshadowed registers we can return now. */
3237 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3238 if (fUnshadowedRegs)
3239 {
3240 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3241 return (fPreferVolatile
3242 ? ASMBitFirstSetU32(fUnshadowedRegs)
3243 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3244 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3245 - 1;
3246 }
3247 }
3248 }
3249#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3250
3251 unsigned const idxReg = (fPreferVolatile
3252 ? ASMBitFirstSetU32(fRegs)
3253 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3254 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3255 - 1;
3256
3257 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3258 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3259 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3260 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3261
3262#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3263 /* We need to flush any pending guest register writes this host register shadows. */
3264 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3265#endif
3266
3267 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3268 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3269 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3270 return idxReg;
3271 }
3272
3273 /*
3274 * Try free up a variable that's in a register.
3275 *
3276 * We do two rounds here, first evacuating variables we don't need to be
3277 * saved on the stack, then in the second round move things to the stack.
3278 */
3279 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3280 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3281 {
3282 uint32_t fVars = pReNative->Core.bmVars;
3283 while (fVars)
3284 {
3285 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3286 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3287#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3288 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3289 continue;
3290#endif
3291
3292 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3293 && (RT_BIT_32(idxReg) & fRegMask)
3294 && ( iLoop == 0
3295 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3296 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3297 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3298 {
3299 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3300 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3301 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3302 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3303 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3304 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3305#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3306 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3307#endif
3308
3309 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3310 {
3311 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3312 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3313 }
3314
3315 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3316 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3317
3318 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3319 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3320 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3321 return idxReg;
3322 }
3323 fVars &= ~RT_BIT_32(idxVar);
3324 }
3325 }
3326
3327 return UINT8_MAX;
3328}
3329
3330
3331/**
3332 * Reassigns a variable to a different register specified by the caller.
3333 *
3334 * @returns The new code buffer position.
3335 * @param pReNative The native recompile state.
3336 * @param off The current code buffer position.
3337 * @param idxVar The variable index.
3338 * @param idxRegOld The old host register number.
3339 * @param idxRegNew The new host register number.
3340 * @param pszCaller The caller for logging.
3341 */
3342static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3343 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3344{
3345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3346 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3347#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3348 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3349#endif
3350 RT_NOREF(pszCaller);
3351
3352#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3353 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3354#endif
3355 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3356
3357 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3358#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3359 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3360#endif
3361 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3362 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3363 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3364
3365 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3366 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3367 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3368 if (fGstRegShadows)
3369 {
3370 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3371 | RT_BIT_32(idxRegNew);
3372 while (fGstRegShadows)
3373 {
3374 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3375 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3376
3377 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3378 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3379 }
3380 }
3381
3382 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3383 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3384 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3385 return off;
3386}
3387
3388
3389/**
3390 * Moves a variable to a different register or spills it onto the stack.
3391 *
3392 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3393 * kinds can easily be recreated if needed later.
3394 *
3395 * @returns The new code buffer position.
3396 * @param pReNative The native recompile state.
3397 * @param off The current code buffer position.
3398 * @param idxVar The variable index.
3399 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3400 * call-volatile registers.
3401 */
3402DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3403 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3404{
3405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3406 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3407 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3408 Assert(!pVar->fRegAcquired);
3409
3410 uint8_t const idxRegOld = pVar->idxReg;
3411 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3412 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3413 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3414 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3415 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3416 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3417 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3418 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3419#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3420 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3421#endif
3422
3423
3424 /** @todo Add statistics on this.*/
3425 /** @todo Implement basic variable liveness analysis (python) so variables
3426 * can be freed immediately once no longer used. This has the potential to
3427 * be trashing registers and stack for dead variables.
3428 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3429
3430 /*
3431 * First try move it to a different register, as that's cheaper.
3432 */
3433 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3434 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3435 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3436 if (fRegs)
3437 {
3438 /* Avoid using shadow registers, if possible. */
3439 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3440 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3441 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3442 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3443 }
3444
3445 /*
3446 * Otherwise we must spill the register onto the stack.
3447 */
3448 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3449 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3450 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3451 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3452
3453 pVar->idxReg = UINT8_MAX;
3454 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3455 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3456 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3457 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3458 return off;
3459}
3460
3461
3462/**
3463 * Allocates a temporary host general purpose register.
3464 *
3465 * This may emit code to save register content onto the stack in order to free
3466 * up a register.
3467 *
3468 * @returns The host register number; throws VBox status code on failure,
3469 * so no need to check the return value.
3470 * @param pReNative The native recompile state.
3471 * @param poff Pointer to the variable with the code buffer position.
3472 * This will be update if we need to move a variable from
3473 * register to stack in order to satisfy the request.
3474 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3475 * registers (@c true, default) or the other way around
3476 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3477 *
3478 * @note Must not modify the host status flags!
3479 */
3480DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3481{
3482 /*
3483 * Try find a completely unused register, preferably a call-volatile one.
3484 */
3485 uint8_t idxReg;
3486 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3487 & ~pReNative->Core.bmHstRegsWithGstShadow
3488 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3489 if (fRegs)
3490 {
3491 if (fPreferVolatile)
3492 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3493 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3494 else
3495 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3496 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3497 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3498 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3499 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3500 }
3501 else
3502 {
3503 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3504 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3505 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3506 }
3507 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3508}
3509
3510
3511/**
3512 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3513 * registers.
3514 *
3515 * @returns The host register number; throws VBox status code on failure,
3516 * so no need to check the return value.
3517 * @param pReNative The native recompile state.
3518 * @param poff Pointer to the variable with the code buffer position.
3519 * This will be update if we need to move a variable from
3520 * register to stack in order to satisfy the request.
3521 * @param fRegMask Mask of acceptable registers.
3522 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3523 * registers (@c true, default) or the other way around
3524 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3525 */
3526DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3527 bool fPreferVolatile /*= true*/)
3528{
3529 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3530 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3531
3532 /*
3533 * Try find a completely unused register, preferably a call-volatile one.
3534 */
3535 uint8_t idxReg;
3536 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3537 & ~pReNative->Core.bmHstRegsWithGstShadow
3538 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3539 & fRegMask;
3540 if (fRegs)
3541 {
3542 if (fPreferVolatile)
3543 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3544 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3545 else
3546 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3547 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3548 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3549 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3550 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3551 }
3552 else
3553 {
3554 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3555 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3556 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3557 }
3558 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3559}
3560
3561
3562/**
3563 * Allocates a temporary register for loading an immediate value into.
3564 *
3565 * This will emit code to load the immediate, unless there happens to be an
3566 * unused register with the value already loaded.
3567 *
3568 * The caller will not modify the returned register, it must be considered
3569 * read-only. Free using iemNativeRegFreeTmpImm.
3570 *
3571 * @returns The host register number; throws VBox status code on failure, so no
3572 * need to check the return value.
3573 * @param pReNative The native recompile state.
3574 * @param poff Pointer to the variable with the code buffer position.
3575 * @param uImm The immediate value that the register must hold upon
3576 * return.
3577 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3578 * registers (@c true, default) or the other way around
3579 * (@c false).
3580 *
3581 * @note Reusing immediate values has not been implemented yet.
3582 */
3583DECL_HIDDEN_THROW(uint8_t)
3584iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3585{
3586 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3587 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3588 return idxReg;
3589}
3590
3591
3592/**
3593 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3594 * iemNativeRegAllocTmpForGuestEFlags().
3595 *
3596 * See iemNativeRegAllocTmpForGuestReg() for details.
3597 */
3598static uint8_t
3599iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3600 IEMNATIVEGSTREGUSE enmIntendedUse, bool fNoVolatileRegs)
3601{
3602 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3603#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3604 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3605#endif
3606 uint32_t const fRegMask = !fNoVolatileRegs
3607 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3608 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3609
3610 /*
3611 * First check if the guest register value is already in a host register.
3612 */
3613 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3614 {
3615 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3616 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3617 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3618 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3619
3620 /* It's not supposed to be allocated... */
3621 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3622 {
3623 /*
3624 * If the register will trash the guest shadow copy, try find a
3625 * completely unused register we can use instead. If that fails,
3626 * we need to disassociate the host reg from the guest reg.
3627 */
3628 /** @todo would be nice to know if preserving the register is in any way helpful. */
3629 /* If the purpose is calculations, try duplicate the register value as
3630 we'll be clobbering the shadow. */
3631 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3632 && ( ~pReNative->Core.bmHstRegs
3633 & ~pReNative->Core.bmHstRegsWithGstShadow
3634 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3635 {
3636 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3637
3638 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3639
3640 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3641 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3642 g_apszIemNativeHstRegNames[idxRegNew]));
3643 idxReg = idxRegNew;
3644 }
3645 /* If the current register matches the restrictions, go ahead and allocate
3646 it for the caller. */
3647 else if (fRegMask & RT_BIT_32(idxReg))
3648 {
3649 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3650 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3651 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3652 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3653 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3654 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3655 else
3656 {
3657 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3658 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3659 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3660 }
3661 }
3662 /* Otherwise, allocate a register that satisfies the caller and transfer
3663 the shadowing if compatible with the intended use. (This basically
3664 means the call wants a non-volatile register (RSP push/pop scenario).) */
3665 else
3666 {
3667 Assert(fNoVolatileRegs);
3668 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3669 !fNoVolatileRegs
3670 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3671 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3672 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3673 {
3674 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3675 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3676 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3677 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3678 }
3679 else
3680 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3681 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3682 g_apszIemNativeHstRegNames[idxRegNew]));
3683 idxReg = idxRegNew;
3684 }
3685 }
3686 else
3687 {
3688 /*
3689 * Oops. Shadowed guest register already allocated!
3690 *
3691 * Allocate a new register, copy the value and, if updating, the
3692 * guest shadow copy assignment to the new register.
3693 */
3694 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3695 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3696 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3697 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3698
3699 /** @todo share register for readonly access. */
3700 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3701 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3702
3703 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3704 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3705
3706 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3707 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3708 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3709 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3710 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3711 else
3712 {
3713 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3714 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3715 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3716 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3717 }
3718 idxReg = idxRegNew;
3719 }
3720 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3721
3722#ifdef VBOX_STRICT
3723 /* Strict builds: Check that the value is correct. */
3724 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3725#endif
3726
3727#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3728 /** @todo r=aeichner Implement for registers other than GPR as well. */
3729 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3730 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3731 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3732 && enmGstReg <= kIemNativeGstReg_GprLast)
3733 || enmGstReg == kIemNativeGstReg_MxCsr))
3734 {
3735# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3736 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3737 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3738# endif
3739 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3740 }
3741#endif
3742
3743 return idxReg;
3744 }
3745
3746 /*
3747 * Allocate a new register, load it with the guest value and designate it as a copy of the
3748 */
3749 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3750
3751 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3752 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3753
3754 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3755 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3756 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3757 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3758
3759#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3760 /** @todo r=aeichner Implement for registers other than GPR as well. */
3761 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3762 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3763 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3764 && enmGstReg <= kIemNativeGstReg_GprLast)
3765 || enmGstReg == kIemNativeGstReg_MxCsr))
3766 {
3767# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3768 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3769 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3770# endif
3771 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3772 }
3773#endif
3774
3775 return idxRegNew;
3776}
3777
3778
3779/**
3780 * Allocates a temporary host general purpose register for keeping a guest
3781 * register value.
3782 *
3783 * Since we may already have a register holding the guest register value,
3784 * code will be emitted to do the loading if that's not the case. Code may also
3785 * be emitted if we have to free up a register to satify the request.
3786 *
3787 * @returns The host register number; throws VBox status code on failure, so no
3788 * need to check the return value.
3789 * @param pReNative The native recompile state.
3790 * @param poff Pointer to the variable with the code buffer
3791 * position. This will be update if we need to move a
3792 * variable from register to stack in order to satisfy
3793 * the request.
3794 * @param enmGstReg The guest register that will is to be updated.
3795 * @param enmIntendedUse How the caller will be using the host register.
3796 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3797 * register is okay (default). The ASSUMPTION here is
3798 * that the caller has already flushed all volatile
3799 * registers, so this is only applied if we allocate a
3800 * new register.
3801 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3802 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3803 */
3804DECL_HIDDEN_THROW(uint8_t)
3805iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3806 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3807 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3808{
3809#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3810 AssertMsg( fSkipLivenessAssert
3811 || pReNative->idxCurCall == 0
3812 || enmGstReg == kIemNativeGstReg_Pc
3813 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3814 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3815 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3816 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3817 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3818 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3819#endif
3820 RT_NOREF(fSkipLivenessAssert);
3821
3822 return iemNativeRegAllocTmpForGuestRegCommon(pReNative, poff, enmGstReg, enmIntendedUse, fNoVolatileRegs);
3823}
3824
3825
3826#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3827/**
3828 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3829 *
3830 * This takes additional arguments for covering liveness assertions in strict
3831 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3832 * kIemNativeGstReg_EFlags as argument.
3833 */
3834DECL_HIDDEN_THROW(uint8_t)
3835iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREGUSE enmIntendedUse,
3836 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3837{
3838 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3839 {
3840 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3841 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3842 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3843 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3844 uint32_t fState;
3845# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3846 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3847 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3848 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3849 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3850 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3851 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3852 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3853 ) \
3854 , ("%s - %u\n", #a_enmGstEfl, fState))
3855 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3856 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3857 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3858 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3859 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3860 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3861 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3862# undef MY_ASSERT_ONE_EFL
3863 }
3864 RT_NOREF(fPotentialCall);
3865 return iemNativeRegAllocTmpForGuestRegCommon(pReNative, poff, kIemNativeGstReg_EFlags,
3866 enmIntendedUse, false /*fNoVolatileRegs*/);
3867}
3868#endif
3869
3870
3871
3872/**
3873 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
3874 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
3875 *
3876 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
3877 */
3878DECL_FORCE_INLINE(uint8_t)
3879iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3880{
3881 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3882
3883 /*
3884 * First check if the guest register value is already in a host register.
3885 */
3886 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3887 {
3888 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3889 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3890 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3891 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3892
3893 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3894 {
3895 /*
3896 * We only do readonly use here, so easy compared to the other
3897 * variant of this code.
3898 */
3899 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3900 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3901 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3902 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3903 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3904
3905#ifdef VBOX_STRICT
3906 /* Strict builds: Check that the value is correct. */
3907 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3908#else
3909 RT_NOREF(poff);
3910#endif
3911 return idxReg;
3912 }
3913 }
3914
3915 return UINT8_MAX;
3916}
3917
3918
3919/**
3920 * Allocates a temporary host general purpose register that already holds the
3921 * given guest register value.
3922 *
3923 * The use case for this function is places where the shadowing state cannot be
3924 * modified due to branching and such. This will fail if the we don't have a
3925 * current shadow copy handy or if it's incompatible. The only code that will
3926 * be emitted here is value checking code in strict builds.
3927 *
3928 * The intended use can only be readonly!
3929 *
3930 * @returns The host register number, UINT8_MAX if not present.
3931 * @param pReNative The native recompile state.
3932 * @param poff Pointer to the instruction buffer offset.
3933 * Will be updated in strict builds if a register is
3934 * found.
3935 * @param enmGstReg The guest register that will is to be updated.
3936 * @note In strict builds, this may throw instruction buffer growth failures.
3937 * Non-strict builds will not throw anything.
3938 * @sa iemNativeRegAllocTmpForGuestReg
3939 */
3940DECL_HIDDEN_THROW(uint8_t)
3941iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3942{
3943#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3944 AssertMsg( pReNative->idxCurCall == 0
3945 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3946 || enmGstReg == kIemNativeGstReg_Pc
3947 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3948#endif
3949 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
3950}
3951
3952
3953#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3954/**
3955 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
3956 * EFLAGS.
3957 *
3958 * This takes additional arguments for covering liveness assertions in strict
3959 * builds, it's otherwise the same as
3960 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
3961 * kIemNativeGstReg_EFlags as argument.
3962 *
3963 * @note The @a fWrite parameter is necessary to complete the liveness picture,
3964 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
3965 * commit. It the operation clobbers all the flags, @a fRead will be
3966 * zero, so better verify the whole picture while we're here.
3967 */
3968DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3969 uint64_t fRead, uint64_t fWrite /*=0*/)
3970{
3971 if (pReNative->idxCurCall != 0)
3972 {
3973 Assert(fRead | fWrite);
3974 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3975 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3976 uint64_t const fAll = fRead | fWrite;
3977 uint32_t fState;
3978# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3979 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3980 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3981 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3982 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3983 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3984 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3985 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3986 ) \
3987 , ("%s - %u\n", #a_enmGstEfl, fState))
3988 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3989 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3990 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3991 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3992 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3993 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3994 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3995# undef MY_ASSERT_ONE_EFL
3996 }
3997 RT_NOREF(fRead);
3998 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
3999}
4000#endif
4001
4002
4003/**
4004 * Allocates argument registers for a function call.
4005 *
4006 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4007 * need to check the return value.
4008 * @param pReNative The native recompile state.
4009 * @param off The current code buffer offset.
4010 * @param cArgs The number of arguments the function call takes.
4011 */
4012DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4013{
4014 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4015 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4016 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4017 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4018
4019 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4020 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4021 else if (cArgs == 0)
4022 return true;
4023
4024 /*
4025 * Do we get luck and all register are free and not shadowing anything?
4026 */
4027 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4028 for (uint32_t i = 0; i < cArgs; i++)
4029 {
4030 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4031 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4032 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4033 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4034 }
4035 /*
4036 * Okay, not lucky so we have to free up the registers.
4037 */
4038 else
4039 for (uint32_t i = 0; i < cArgs; i++)
4040 {
4041 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4042 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4043 {
4044 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4045 {
4046 case kIemNativeWhat_Var:
4047 {
4048 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4049 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4050 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4051 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4052 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4053#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4054 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4055#endif
4056
4057 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4058 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4059 else
4060 {
4061 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4062 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4063 }
4064 break;
4065 }
4066
4067 case kIemNativeWhat_Tmp:
4068 case kIemNativeWhat_Arg:
4069 case kIemNativeWhat_rc:
4070 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4071 default:
4072 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4073 }
4074
4075 }
4076 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4077 {
4078 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4079 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4080 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4081#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4082 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4083#endif
4084 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4085 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4086 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4087 }
4088 else
4089 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4090 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4091 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4092 }
4093 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4094 return true;
4095}
4096
4097
4098DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4099
4100
4101#if 0
4102/**
4103 * Frees a register assignment of any type.
4104 *
4105 * @param pReNative The native recompile state.
4106 * @param idxHstReg The register to free.
4107 *
4108 * @note Does not update variables.
4109 */
4110DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4111{
4112 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4113 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4114 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4115 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4116 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4117 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4118 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4119 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4120 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4121 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4122 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4123 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4124 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4125 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4126
4127 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4128 /* no flushing, right:
4129 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4130 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4131 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4132 */
4133}
4134#endif
4135
4136
4137/**
4138 * Frees a temporary register.
4139 *
4140 * Any shadow copies of guest registers assigned to the host register will not
4141 * be flushed by this operation.
4142 */
4143DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4144{
4145 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4146 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4147 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4148 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4149 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4150}
4151
4152
4153/**
4154 * Frees a temporary immediate register.
4155 *
4156 * It is assumed that the call has not modified the register, so it still hold
4157 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4158 */
4159DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4160{
4161 iemNativeRegFreeTmp(pReNative, idxHstReg);
4162}
4163
4164
4165/**
4166 * Frees a register assigned to a variable.
4167 *
4168 * The register will be disassociated from the variable.
4169 */
4170DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4171{
4172 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4173 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4174 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4175 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4176 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4177#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4178 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4179#endif
4180
4181 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4182 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4183 if (!fFlushShadows)
4184 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4185 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4186 else
4187 {
4188 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4189 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4190#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4191 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4192#endif
4193 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4194 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4195 uint64_t fGstRegShadows = fGstRegShadowsOld;
4196 while (fGstRegShadows)
4197 {
4198 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4199 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4200
4201 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4202 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4203 }
4204 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4205 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4206 }
4207}
4208
4209
4210#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4211# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4212/** Host CPU SIMD register names. */
4213DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4214{
4215# ifdef RT_ARCH_AMD64
4216 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4217# elif RT_ARCH_ARM64
4218 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4219 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4220# else
4221# error "port me"
4222# endif
4223};
4224# endif
4225
4226
4227/**
4228 * Frees a SIMD register assigned to a variable.
4229 *
4230 * The register will be disassociated from the variable.
4231 */
4232DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4233{
4234 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4235 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4236 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4237 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4238 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4239 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4240
4241 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4242 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4243 if (!fFlushShadows)
4244 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4245 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4246 else
4247 {
4248 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4249 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4250 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4251 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4252 uint64_t fGstRegShadows = fGstRegShadowsOld;
4253 while (fGstRegShadows)
4254 {
4255 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4256 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4257
4258 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4259 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4260 }
4261 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4262 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4263 }
4264}
4265
4266
4267/**
4268 * Reassigns a variable to a different SIMD register specified by the caller.
4269 *
4270 * @returns The new code buffer position.
4271 * @param pReNative The native recompile state.
4272 * @param off The current code buffer position.
4273 * @param idxVar The variable index.
4274 * @param idxRegOld The old host register number.
4275 * @param idxRegNew The new host register number.
4276 * @param pszCaller The caller for logging.
4277 */
4278static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4279 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4280{
4281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4282 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4283 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4284 RT_NOREF(pszCaller);
4285
4286 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4287 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4288 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4289
4290 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4291 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4292 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4293
4294 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4295 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4296 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4297
4298 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4299 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4300 else
4301 {
4302 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4303 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4304 }
4305
4306 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4307 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4308 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4309 if (fGstRegShadows)
4310 {
4311 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4312 | RT_BIT_32(idxRegNew);
4313 while (fGstRegShadows)
4314 {
4315 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4316 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4317
4318 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4319 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4320 }
4321 }
4322
4323 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4324 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4325 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4326 return off;
4327}
4328
4329
4330/**
4331 * Moves a variable to a different register or spills it onto the stack.
4332 *
4333 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4334 * kinds can easily be recreated if needed later.
4335 *
4336 * @returns The new code buffer position.
4337 * @param pReNative The native recompile state.
4338 * @param off The current code buffer position.
4339 * @param idxVar The variable index.
4340 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4341 * call-volatile registers.
4342 */
4343DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4344 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4345{
4346 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4347 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4348 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4349 Assert(!pVar->fRegAcquired);
4350 Assert(!pVar->fSimdReg);
4351
4352 uint8_t const idxRegOld = pVar->idxReg;
4353 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4354 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4355 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4356 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4357 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4358 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4359 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4360 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4361 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4362 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4363
4364 /** @todo Add statistics on this.*/
4365 /** @todo Implement basic variable liveness analysis (python) so variables
4366 * can be freed immediately once no longer used. This has the potential to
4367 * be trashing registers and stack for dead variables.
4368 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4369
4370 /*
4371 * First try move it to a different register, as that's cheaper.
4372 */
4373 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4374 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4375 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4376 if (fRegs)
4377 {
4378 /* Avoid using shadow registers, if possible. */
4379 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4380 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4381 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4382 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4383 }
4384
4385 /*
4386 * Otherwise we must spill the register onto the stack.
4387 */
4388 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4389 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4390 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4391
4392 if (pVar->cbVar == sizeof(RTUINT128U))
4393 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4394 else
4395 {
4396 Assert(pVar->cbVar == sizeof(RTUINT256U));
4397 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4398 }
4399
4400 pVar->idxReg = UINT8_MAX;
4401 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4402 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4403 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4404 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4405 return off;
4406}
4407
4408
4409/**
4410 * Called right before emitting a call instruction to move anything important
4411 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4412 * optionally freeing argument variables.
4413 *
4414 * @returns New code buffer offset, UINT32_MAX on failure.
4415 * @param pReNative The native recompile state.
4416 * @param off The code buffer offset.
4417 * @param cArgs The number of arguments the function call takes.
4418 * It is presumed that the host register part of these have
4419 * been allocated as such already and won't need moving,
4420 * just freeing.
4421 * @param fKeepVars Mask of variables that should keep their register
4422 * assignments. Caller must take care to handle these.
4423 */
4424DECL_HIDDEN_THROW(uint32_t)
4425iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4426{
4427 Assert(!cArgs); RT_NOREF(cArgs);
4428
4429 /* fKeepVars will reduce this mask. */
4430 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4431
4432 /*
4433 * Move anything important out of volatile registers.
4434 */
4435 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4436#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4437 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4438#endif
4439 ;
4440
4441 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4442 if (!fSimdRegsToMove)
4443 { /* likely */ }
4444 else
4445 {
4446 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4447 while (fSimdRegsToMove != 0)
4448 {
4449 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4450 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4451
4452 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4453 {
4454 case kIemNativeWhat_Var:
4455 {
4456 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4457 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4458 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4459 Assert(pVar->idxReg == idxSimdReg);
4460 Assert(pVar->fSimdReg);
4461 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4462 {
4463 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4464 idxVar, pVar->enmKind, pVar->idxReg));
4465 if (pVar->enmKind != kIemNativeVarKind_Stack)
4466 pVar->idxReg = UINT8_MAX;
4467 else
4468 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4469 }
4470 else
4471 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4472 continue;
4473 }
4474
4475 case kIemNativeWhat_Arg:
4476 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4477 continue;
4478
4479 case kIemNativeWhat_rc:
4480 case kIemNativeWhat_Tmp:
4481 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4482 continue;
4483
4484 case kIemNativeWhat_FixedReserved:
4485#ifdef RT_ARCH_ARM64
4486 continue; /* On ARM the upper half of the virtual 256-bit register. */
4487#endif
4488
4489 case kIemNativeWhat_FixedTmp:
4490 case kIemNativeWhat_pVCpuFixed:
4491 case kIemNativeWhat_pCtxFixed:
4492 case kIemNativeWhat_PcShadow:
4493 case kIemNativeWhat_Invalid:
4494 case kIemNativeWhat_End:
4495 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4496 }
4497 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4498 }
4499 }
4500
4501 /*
4502 * Do the actual freeing.
4503 */
4504 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4505 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4506 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4507 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4508
4509 /* If there are guest register shadows in any call-volatile register, we
4510 have to clear the corrsponding guest register masks for each register. */
4511 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4512 if (fHstSimdRegsWithGstShadow)
4513 {
4514 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4515 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4516 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4517 do
4518 {
4519 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4520 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4521
4522 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4523
4524#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4525 /*
4526 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4527 * to call volatile registers).
4528 */
4529 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4530 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4531 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4532#endif
4533 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4534 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4535
4536 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4537 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4538 } while (fHstSimdRegsWithGstShadow != 0);
4539 }
4540
4541 return off;
4542}
4543#endif
4544
4545
4546/**
4547 * Called right before emitting a call instruction to move anything important
4548 * out of call-volatile registers, free and flush the call-volatile registers,
4549 * optionally freeing argument variables.
4550 *
4551 * @returns New code buffer offset, UINT32_MAX on failure.
4552 * @param pReNative The native recompile state.
4553 * @param off The code buffer offset.
4554 * @param cArgs The number of arguments the function call takes.
4555 * It is presumed that the host register part of these have
4556 * been allocated as such already and won't need moving,
4557 * just freeing.
4558 * @param fKeepVars Mask of variables that should keep their register
4559 * assignments. Caller must take care to handle these.
4560 */
4561DECL_HIDDEN_THROW(uint32_t)
4562iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4563{
4564 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4565
4566 /* fKeepVars will reduce this mask. */
4567 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4568
4569#ifdef RT_ARCH_ARM64
4570AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4571#endif
4572
4573 /*
4574 * Move anything important out of volatile registers.
4575 */
4576 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4577 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4578 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4579#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4580 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4581#endif
4582 & ~g_afIemNativeCallRegs[cArgs];
4583
4584 fRegsToMove &= pReNative->Core.bmHstRegs;
4585 if (!fRegsToMove)
4586 { /* likely */ }
4587 else
4588 {
4589 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4590 while (fRegsToMove != 0)
4591 {
4592 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4593 fRegsToMove &= ~RT_BIT_32(idxReg);
4594
4595 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4596 {
4597 case kIemNativeWhat_Var:
4598 {
4599 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4600 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4601 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4602 Assert(pVar->idxReg == idxReg);
4603#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4604 Assert(!pVar->fSimdReg);
4605#endif
4606 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4607 {
4608 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4609 idxVar, pVar->enmKind, pVar->idxReg));
4610 if (pVar->enmKind != kIemNativeVarKind_Stack)
4611 pVar->idxReg = UINT8_MAX;
4612 else
4613 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4614 }
4615 else
4616 fRegsToFree &= ~RT_BIT_32(idxReg);
4617 continue;
4618 }
4619
4620 case kIemNativeWhat_Arg:
4621 AssertMsgFailed(("What?!?: %u\n", idxReg));
4622 continue;
4623
4624 case kIemNativeWhat_rc:
4625 case kIemNativeWhat_Tmp:
4626 AssertMsgFailed(("Missing free: %u\n", idxReg));
4627 continue;
4628
4629 case kIemNativeWhat_FixedTmp:
4630 case kIemNativeWhat_pVCpuFixed:
4631 case kIemNativeWhat_pCtxFixed:
4632 case kIemNativeWhat_PcShadow:
4633 case kIemNativeWhat_FixedReserved:
4634 case kIemNativeWhat_Invalid:
4635 case kIemNativeWhat_End:
4636 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4637 }
4638 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4639 }
4640 }
4641
4642 /*
4643 * Do the actual freeing.
4644 */
4645 if (pReNative->Core.bmHstRegs & fRegsToFree)
4646 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4647 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4648 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4649
4650 /* If there are guest register shadows in any call-volatile register, we
4651 have to clear the corrsponding guest register masks for each register. */
4652 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4653 if (fHstRegsWithGstShadow)
4654 {
4655 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4656 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4657 fHstRegsWithGstShadow));
4658 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4659 do
4660 {
4661 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4662 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4663
4664 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4665
4666#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4667 /*
4668 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4669 * to call volatile registers).
4670 */
4671 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4672 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4673 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4674#endif
4675
4676 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4677 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4678 } while (fHstRegsWithGstShadow != 0);
4679 }
4680
4681#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4682 /* Now for the SIMD registers, no argument support for now. */
4683 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4684#endif
4685
4686 return off;
4687}
4688
4689
4690/**
4691 * Flushes a set of guest register shadow copies.
4692 *
4693 * This is usually done after calling a threaded function or a C-implementation
4694 * of an instruction.
4695 *
4696 * @param pReNative The native recompile state.
4697 * @param fGstRegs Set of guest registers to flush.
4698 */
4699DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4700{
4701 /*
4702 * Reduce the mask by what's currently shadowed
4703 */
4704 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4705 fGstRegs &= bmGstRegShadowsOld;
4706 if (fGstRegs)
4707 {
4708 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4709 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4710 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4711 if (bmGstRegShadowsNew)
4712 {
4713 /*
4714 * Partial.
4715 */
4716 do
4717 {
4718 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4719 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4720 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4721 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4722 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4723#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4724 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4725#endif
4726
4727 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4728 fGstRegs &= ~fInThisHstReg;
4729 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4730 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4731 if (!fGstRegShadowsNew)
4732 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4733 } while (fGstRegs != 0);
4734 }
4735 else
4736 {
4737 /*
4738 * Clear all.
4739 */
4740 do
4741 {
4742 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4743 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4744 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4745 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4746 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4747#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4748 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4749#endif
4750
4751 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4752 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4753 } while (fGstRegs != 0);
4754 pReNative->Core.bmHstRegsWithGstShadow = 0;
4755 }
4756 }
4757}
4758
4759
4760/**
4761 * Flushes guest register shadow copies held by a set of host registers.
4762 *
4763 * This is used with the TLB lookup code for ensuring that we don't carry on
4764 * with any guest shadows in volatile registers, as these will get corrupted by
4765 * a TLB miss.
4766 *
4767 * @param pReNative The native recompile state.
4768 * @param fHstRegs Set of host registers to flush guest shadows for.
4769 */
4770DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4771{
4772 /*
4773 * Reduce the mask by what's currently shadowed.
4774 */
4775 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4776 fHstRegs &= bmHstRegsWithGstShadowOld;
4777 if (fHstRegs)
4778 {
4779 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4780 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4781 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4782 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4783 if (bmHstRegsWithGstShadowNew)
4784 {
4785 /*
4786 * Partial (likely).
4787 */
4788 uint64_t fGstShadows = 0;
4789 do
4790 {
4791 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4792 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4793 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4794 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4795#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4796 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4797#endif
4798
4799 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4800 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4801 fHstRegs &= ~RT_BIT_32(idxHstReg);
4802 } while (fHstRegs != 0);
4803 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4804 }
4805 else
4806 {
4807 /*
4808 * Clear all.
4809 */
4810 do
4811 {
4812 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4813 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4814 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4815 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4816#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4817 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4818#endif
4819
4820 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4821 fHstRegs &= ~RT_BIT_32(idxHstReg);
4822 } while (fHstRegs != 0);
4823 pReNative->Core.bmGstRegShadows = 0;
4824 }
4825 }
4826}
4827
4828
4829/**
4830 * Restores guest shadow copies in volatile registers.
4831 *
4832 * This is used after calling a helper function (think TLB miss) to restore the
4833 * register state of volatile registers.
4834 *
4835 * @param pReNative The native recompile state.
4836 * @param off The code buffer offset.
4837 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4838 * be active (allocated) w/o asserting. Hack.
4839 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4840 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4841 */
4842DECL_HIDDEN_THROW(uint32_t)
4843iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4844{
4845 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4846 if (fHstRegs)
4847 {
4848 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4849 do
4850 {
4851 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4852
4853 /* It's not fatal if a register is active holding a variable that
4854 shadowing a guest register, ASSUMING all pending guest register
4855 writes were flushed prior to the helper call. However, we'll be
4856 emitting duplicate restores, so it wasts code space. */
4857 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4858 RT_NOREF(fHstRegsActiveShadows);
4859
4860 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4861#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4862 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4863#endif
4864 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4865 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4866 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4867
4868 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4869 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4870
4871 fHstRegs &= ~RT_BIT_32(idxHstReg);
4872 } while (fHstRegs != 0);
4873 }
4874 return off;
4875}
4876
4877
4878
4879
4880/*********************************************************************************************************************************
4881* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4882*********************************************************************************************************************************/
4883#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4884
4885/**
4886 * Info about shadowed guest SIMD register values.
4887 * @see IEMNATIVEGSTSIMDREG
4888 */
4889static struct
4890{
4891 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4892 uint32_t offXmm;
4893 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4894 uint32_t offYmm;
4895 /** Name (for logging). */
4896 const char *pszName;
4897} const g_aGstSimdShadowInfo[] =
4898{
4899#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4900 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4901 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4902 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4903 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4904 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4905 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4906 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4907 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4908 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4909 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4910 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4911 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4912 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4913 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4914 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4915 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4916 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4917#undef CPUMCTX_OFF_AND_SIZE
4918};
4919AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4920
4921
4922/**
4923 * Frees a temporary SIMD register.
4924 *
4925 * Any shadow copies of guest registers assigned to the host register will not
4926 * be flushed by this operation.
4927 */
4928DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4929{
4930 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4931 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4932 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4933 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4934 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4935}
4936
4937
4938/**
4939 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4940 *
4941 * @returns New code bufferoffset.
4942 * @param pReNative The native recompile state.
4943 * @param off Current code buffer position.
4944 * @param enmGstSimdReg The guest SIMD register to flush.
4945 */
4946DECL_HIDDEN_THROW(uint32_t)
4947iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4948{
4949 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4950
4951 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4952 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4953 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4954 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4955
4956 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4957 {
4958 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4959 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4960 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4961 }
4962
4963 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4964 {
4965 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4966 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4967 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4968 }
4969
4970 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4971 return off;
4972}
4973
4974
4975/**
4976 * Flush the given set of guest SIMD registers if marked as dirty.
4977 *
4978 * @returns New code buffer offset.
4979 * @param pReNative The native recompile state.
4980 * @param off Current code buffer position.
4981 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4982 */
4983DECL_HIDDEN_THROW(uint32_t)
4984iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4985{
4986 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4987 & fFlushGstSimdReg;
4988 if (bmGstSimdRegShadowDirty)
4989 {
4990# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4991 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4992 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4993# endif
4994
4995 do
4996 {
4997 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4998 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4999 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5000 } while (bmGstSimdRegShadowDirty);
5001 }
5002
5003 return off;
5004}
5005
5006
5007#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5008/**
5009 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5010 *
5011 * @returns New code buffer offset.
5012 * @param pReNative The native recompile state.
5013 * @param off Current code buffer position.
5014 * @param idxHstSimdReg The host SIMD register.
5015 *
5016 * @note This doesn't do any unshadowing of guest registers from the host register.
5017 */
5018DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5019{
5020 /* We need to flush any pending guest register writes this host register shadows. */
5021 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5022 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5023 if (bmGstSimdRegShadowDirty)
5024 {
5025# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5026 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5027 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5028# endif
5029
5030 do
5031 {
5032 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5033 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5034 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5035 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5036 } while (bmGstSimdRegShadowDirty);
5037 }
5038
5039 return off;
5040}
5041#endif
5042
5043
5044/**
5045 * Locate a register, possibly freeing one up.
5046 *
5047 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5048 * failed.
5049 *
5050 * @returns Host register number on success. Returns UINT8_MAX if no registers
5051 * found, the caller is supposed to deal with this and raise a
5052 * allocation type specific status code (if desired).
5053 *
5054 * @throws VBox status code if we're run into trouble spilling a variable of
5055 * recording debug info. Does NOT throw anything if we're out of
5056 * registers, though.
5057 */
5058static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5059 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5060{
5061 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5062 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5063 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5064
5065 /*
5066 * Try a freed register that's shadowing a guest register.
5067 */
5068 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5069 if (fRegs)
5070 {
5071 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5072
5073#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5074 /*
5075 * When we have livness information, we use it to kick out all shadowed
5076 * guest register that will not be needed any more in this TB. If we're
5077 * lucky, this may prevent us from ending up here again.
5078 *
5079 * Note! We must consider the previous entry here so we don't free
5080 * anything that the current threaded function requires (current
5081 * entry is produced by the next threaded function).
5082 */
5083 uint32_t const idxCurCall = pReNative->idxCurCall;
5084 if (idxCurCall > 0)
5085 {
5086 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5087 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5088
5089 /* If it matches any shadowed registers. */
5090 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5091 {
5092 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5093 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5094 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5095
5096 /* See if we've got any unshadowed registers we can return now. */
5097 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5098 if (fUnshadowedRegs)
5099 {
5100 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5101 return (fPreferVolatile
5102 ? ASMBitFirstSetU32(fUnshadowedRegs)
5103 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5104 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5105 - 1;
5106 }
5107 }
5108 }
5109#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5110
5111 unsigned const idxReg = (fPreferVolatile
5112 ? ASMBitFirstSetU32(fRegs)
5113 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5114 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5115 - 1;
5116
5117 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5118 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5119 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5120 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5121
5122 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5123 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5124
5125 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5126 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5127 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5128 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5129 return idxReg;
5130 }
5131
5132 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5133
5134 /*
5135 * Try free up a variable that's in a register.
5136 *
5137 * We do two rounds here, first evacuating variables we don't need to be
5138 * saved on the stack, then in the second round move things to the stack.
5139 */
5140 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5141 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5142 {
5143 uint32_t fVars = pReNative->Core.bmVars;
5144 while (fVars)
5145 {
5146 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5147 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5148 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5149 continue;
5150
5151 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5152 && (RT_BIT_32(idxReg) & fRegMask)
5153 && ( iLoop == 0
5154 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5155 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5156 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5157 {
5158 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5159 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5160 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5161 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5162 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5163 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5164
5165 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5166 {
5167 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5168 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5169 }
5170
5171 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5172 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5173
5174 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5175 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5176 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5177 return idxReg;
5178 }
5179 fVars &= ~RT_BIT_32(idxVar);
5180 }
5181 }
5182
5183 AssertFailed();
5184 return UINT8_MAX;
5185}
5186
5187
5188/**
5189 * Flushes a set of guest register shadow copies.
5190 *
5191 * This is usually done after calling a threaded function or a C-implementation
5192 * of an instruction.
5193 *
5194 * @param pReNative The native recompile state.
5195 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5196 */
5197DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5198{
5199 /*
5200 * Reduce the mask by what's currently shadowed
5201 */
5202 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5203 fGstSimdRegs &= bmGstSimdRegShadows;
5204 if (fGstSimdRegs)
5205 {
5206 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5207 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5208 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5209 if (bmGstSimdRegShadowsNew)
5210 {
5211 /*
5212 * Partial.
5213 */
5214 do
5215 {
5216 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5217 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5218 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5219 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5220 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5221 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5222
5223 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5224 fGstSimdRegs &= ~fInThisHstReg;
5225 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5226 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5227 if (!fGstRegShadowsNew)
5228 {
5229 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5230 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5231 }
5232 } while (fGstSimdRegs != 0);
5233 }
5234 else
5235 {
5236 /*
5237 * Clear all.
5238 */
5239 do
5240 {
5241 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5242 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5243 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5244 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5245 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5246 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5247
5248 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5249 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5250 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5251 } while (fGstSimdRegs != 0);
5252 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5253 }
5254 }
5255}
5256
5257
5258/**
5259 * Allocates a temporary host SIMD register.
5260 *
5261 * This may emit code to save register content onto the stack in order to free
5262 * up a register.
5263 *
5264 * @returns The host register number; throws VBox status code on failure,
5265 * so no need to check the return value.
5266 * @param pReNative The native recompile state.
5267 * @param poff Pointer to the variable with the code buffer position.
5268 * This will be update if we need to move a variable from
5269 * register to stack in order to satisfy the request.
5270 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5271 * registers (@c true, default) or the other way around
5272 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5273 */
5274DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5275{
5276 /*
5277 * Try find a completely unused register, preferably a call-volatile one.
5278 */
5279 uint8_t idxSimdReg;
5280 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5281 & ~pReNative->Core.bmHstRegsWithGstShadow
5282 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5283 if (fRegs)
5284 {
5285 if (fPreferVolatile)
5286 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5287 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5288 else
5289 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5290 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5291 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5292 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5293
5294 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5295 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5296 }
5297 else
5298 {
5299 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5300 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5301 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5302 }
5303
5304 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5305 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5306}
5307
5308
5309/**
5310 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5311 * registers.
5312 *
5313 * @returns The host register number; throws VBox status code on failure,
5314 * so no need to check the return value.
5315 * @param pReNative The native recompile state.
5316 * @param poff Pointer to the variable with the code buffer position.
5317 * This will be update if we need to move a variable from
5318 * register to stack in order to satisfy the request.
5319 * @param fRegMask Mask of acceptable registers.
5320 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5321 * registers (@c true, default) or the other way around
5322 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5323 */
5324DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5325 bool fPreferVolatile /*= true*/)
5326{
5327 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5328 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5329
5330 /*
5331 * Try find a completely unused register, preferably a call-volatile one.
5332 */
5333 uint8_t idxSimdReg;
5334 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5335 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5336 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5337 & fRegMask;
5338 if (fRegs)
5339 {
5340 if (fPreferVolatile)
5341 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5342 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5343 else
5344 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5345 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5346 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5347 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5348
5349 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5350 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5351 }
5352 else
5353 {
5354 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5355 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5356 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5357 }
5358
5359 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5360 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5361}
5362
5363
5364/**
5365 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5366 *
5367 * @param pReNative The native recompile state.
5368 * @param idxHstSimdReg The host SIMD register to update the state for.
5369 * @param enmLoadSz The load size to set.
5370 */
5371DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5372 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5373{
5374 /* Everything valid already? -> nothing to do. */
5375 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5376 return;
5377
5378 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5379 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5380 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5381 {
5382 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5383 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5384 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5385 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5386 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5387 }
5388}
5389
5390
5391static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5392 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5393{
5394 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5395 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5396 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5397 {
5398# ifdef RT_ARCH_ARM64
5399 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5400 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5401# endif
5402
5403 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5404 {
5405 switch (enmLoadSzDst)
5406 {
5407 case kIemNativeGstSimdRegLdStSz_256:
5408 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5409 break;
5410 case kIemNativeGstSimdRegLdStSz_Low128:
5411 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5412 break;
5413 case kIemNativeGstSimdRegLdStSz_High128:
5414 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5415 break;
5416 default:
5417 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5418 }
5419
5420 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5421 }
5422 }
5423 else
5424 {
5425 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5426 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5427 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5428 }
5429
5430 return off;
5431}
5432
5433
5434/**
5435 * Allocates a temporary host SIMD register for keeping a guest
5436 * SIMD register value.
5437 *
5438 * Since we may already have a register holding the guest register value,
5439 * code will be emitted to do the loading if that's not the case. Code may also
5440 * be emitted if we have to free up a register to satify the request.
5441 *
5442 * @returns The host register number; throws VBox status code on failure, so no
5443 * need to check the return value.
5444 * @param pReNative The native recompile state.
5445 * @param poff Pointer to the variable with the code buffer
5446 * position. This will be update if we need to move a
5447 * variable from register to stack in order to satisfy
5448 * the request.
5449 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5450 * @param enmIntendedUse How the caller will be using the host register.
5451 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5452 * register is okay (default). The ASSUMPTION here is
5453 * that the caller has already flushed all volatile
5454 * registers, so this is only applied if we allocate a
5455 * new register.
5456 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5457 */
5458DECL_HIDDEN_THROW(uint8_t)
5459iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5460 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5461 bool fNoVolatileRegs /*= false*/)
5462{
5463 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5464#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5465 AssertMsg( pReNative->idxCurCall == 0
5466 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5467 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5468 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5469 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5470 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5471 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5472#endif
5473#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5474 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5475#endif
5476 uint32_t const fRegMask = !fNoVolatileRegs
5477 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5478 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5479
5480 /*
5481 * First check if the guest register value is already in a host register.
5482 */
5483 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5484 {
5485 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5486 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5487 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5488 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5489
5490 /* It's not supposed to be allocated... */
5491 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5492 {
5493 /*
5494 * If the register will trash the guest shadow copy, try find a
5495 * completely unused register we can use instead. If that fails,
5496 * we need to disassociate the host reg from the guest reg.
5497 */
5498 /** @todo would be nice to know if preserving the register is in any way helpful. */
5499 /* If the purpose is calculations, try duplicate the register value as
5500 we'll be clobbering the shadow. */
5501 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5502 && ( ~pReNative->Core.bmHstSimdRegs
5503 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5504 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5505 {
5506 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5507
5508 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5509
5510 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5511 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5512 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5513 idxSimdReg = idxRegNew;
5514 }
5515 /* If the current register matches the restrictions, go ahead and allocate
5516 it for the caller. */
5517 else if (fRegMask & RT_BIT_32(idxSimdReg))
5518 {
5519 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5520 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5521 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5522 {
5523 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5524 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5525 else
5526 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5527 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5528 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5529 }
5530 else
5531 {
5532 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5533 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5534 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5535 }
5536 }
5537 /* Otherwise, allocate a register that satisfies the caller and transfer
5538 the shadowing if compatible with the intended use. (This basically
5539 means the call wants a non-volatile register (RSP push/pop scenario).) */
5540 else
5541 {
5542 Assert(fNoVolatileRegs);
5543 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5544 !fNoVolatileRegs
5545 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5546 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5547 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5548 {
5549 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5550 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5551 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5552 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5553 }
5554 else
5555 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5556 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5557 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5558 idxSimdReg = idxRegNew;
5559 }
5560 }
5561 else
5562 {
5563 /*
5564 * Oops. Shadowed guest register already allocated!
5565 *
5566 * Allocate a new register, copy the value and, if updating, the
5567 * guest shadow copy assignment to the new register.
5568 */
5569 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5570 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5571 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5572 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5573
5574 /** @todo share register for readonly access. */
5575 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5576 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5577
5578 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5579 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5580 else
5581 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5582
5583 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5584 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5585 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5586 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5587 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5588 else
5589 {
5590 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5591 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5592 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5593 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5594 }
5595 idxSimdReg = idxRegNew;
5596 }
5597 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5598
5599#ifdef VBOX_STRICT
5600 /* Strict builds: Check that the value is correct. */
5601 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5602 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5603#endif
5604
5605 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5606 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5607 {
5608# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5609 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5610 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5611# endif
5612
5613 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5614 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5615 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5616 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5617 else
5618 {
5619 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5620 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5621 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5622 }
5623 }
5624
5625 return idxSimdReg;
5626 }
5627
5628 /*
5629 * Allocate a new register, load it with the guest value and designate it as a copy of the
5630 */
5631 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5632
5633 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5634 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5635 else
5636 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5637
5638 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5639 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5640
5641 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5642 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5643 {
5644# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5645 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5646 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5647# endif
5648
5649 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5650 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5651 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5652 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5653 else
5654 {
5655 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5656 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5657 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5658 }
5659 }
5660
5661 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5662 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5663
5664 return idxRegNew;
5665}
5666
5667
5668/**
5669 * Flushes guest SIMD register shadow copies held by a set of host registers.
5670 *
5671 * This is used whenever calling an external helper for ensuring that we don't carry on
5672 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5673 *
5674 * @param pReNative The native recompile state.
5675 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5676 */
5677DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5678{
5679 /*
5680 * Reduce the mask by what's currently shadowed.
5681 */
5682 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5683 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5684 if (fHstSimdRegs)
5685 {
5686 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5687 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5688 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5689 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5690 if (bmHstSimdRegsWithGstShadowNew)
5691 {
5692 /*
5693 * Partial (likely).
5694 */
5695 uint64_t fGstShadows = 0;
5696 do
5697 {
5698 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5699 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5700 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5701 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5702 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5703 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5704
5705 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5706 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5707 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5708 } while (fHstSimdRegs != 0);
5709 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5710 }
5711 else
5712 {
5713 /*
5714 * Clear all.
5715 */
5716 do
5717 {
5718 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5719 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5720 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5721 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5722 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5723 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5724
5725 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5726 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5727 } while (fHstSimdRegs != 0);
5728 pReNative->Core.bmGstSimdRegShadows = 0;
5729 }
5730 }
5731}
5732#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5733
5734
5735
5736/*********************************************************************************************************************************
5737* Code emitters for flushing pending guest register writes and sanity checks *
5738*********************************************************************************************************************************/
5739
5740#ifdef VBOX_STRICT
5741/**
5742 * Does internal register allocator sanity checks.
5743 */
5744DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5745{
5746 /*
5747 * Iterate host registers building a guest shadowing set.
5748 */
5749 uint64_t bmGstRegShadows = 0;
5750 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5751 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5752 while (bmHstRegsWithGstShadow)
5753 {
5754 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5755 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5756 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5757
5758 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5759 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5760 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5761 bmGstRegShadows |= fThisGstRegShadows;
5762 while (fThisGstRegShadows)
5763 {
5764 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5765 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5766 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5767 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5768 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5769 }
5770 }
5771 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5772 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5773 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5774
5775 /*
5776 * Now the other way around, checking the guest to host index array.
5777 */
5778 bmHstRegsWithGstShadow = 0;
5779 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5780 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5781 while (bmGstRegShadows)
5782 {
5783 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5784 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5785 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5786
5787 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5788 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5789 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5790 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5791 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5792 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5793 }
5794 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5795 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5796 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5797}
5798#endif /* VBOX_STRICT */
5799
5800
5801/**
5802 * Flushes any delayed guest register writes.
5803 *
5804 * This must be called prior to calling CImpl functions and any helpers that use
5805 * the guest state (like raising exceptions) and such.
5806 *
5807 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5808 * the caller if it wishes to do so.
5809 */
5810DECL_HIDDEN_THROW(uint32_t)
5811iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5812{
5813#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5814 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5815 off = iemNativeEmitPcWriteback(pReNative, off);
5816#else
5817 RT_NOREF(pReNative, fGstShwExcept);
5818#endif
5819
5820#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5821 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5822#endif
5823
5824#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5825 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5826#endif
5827
5828 return off;
5829}
5830
5831#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5832
5833# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5834
5835/**
5836 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5837 */
5838DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5839{
5840 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5841 Assert(pReNative->Core.fDebugPcInitialized);
5842
5843 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5844# ifdef RT_ARCH_AMD64
5845 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5846 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5847 pCodeBuf[off++] = 0x3b;
5848 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5849# else
5850 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5851 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5852 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5853# endif
5854
5855 uint32_t offFixup = off;
5856 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5857 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5858 iemNativeFixupFixedJump(pReNative, offFixup, off);
5859
5860 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5861 return off;
5862}
5863
5864
5865/**
5866 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5867 */
5868DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5869{
5870 if (pReNative->Core.fDebugPcInitialized)
5871 {
5872 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5873 if (pReNative->Core.offPc)
5874 {
5875 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5876 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5877 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5878 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5879 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5880 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5881 }
5882 else
5883 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5884 iemNativeRegFreeTmp(pReNative, idxPcReg);
5885 }
5886 return off;
5887}
5888
5889# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
5890
5891/**
5892 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5893 */
5894DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5895{
5896 Assert(pReNative->Core.offPc);
5897# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
5898 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
5899# else
5900 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
5901 uint8_t idxCurCall = pReNative->idxCurCall;
5902 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
5903 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
5904 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
5905 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
5906 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
5907 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
5908 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
5909
5910 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
5911
5912# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5913 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5914 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
5915# endif
5916# endif
5917
5918# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5919 /* Allocate a temporary PC register. */
5920 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5921
5922 /* Perform the addition and store the result. */
5923 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5924 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5925# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5926 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5927# endif
5928
5929 /* Free but don't flush the PC register. */
5930 iemNativeRegFreeTmp(pReNative, idxPcReg);
5931# else
5932 /* Compare the shadow with the context value, they should match. */
5933 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5934 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5935# endif
5936
5937 pReNative->Core.offPc = 0;
5938
5939 return off;
5940}
5941
5942#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5943
5944
5945/*********************************************************************************************************************************
5946* Code Emitters (larger snippets) *
5947*********************************************************************************************************************************/
5948
5949/**
5950 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5951 * extending to 64-bit width.
5952 *
5953 * @returns New code buffer offset on success, UINT32_MAX on failure.
5954 * @param pReNative .
5955 * @param off The current code buffer position.
5956 * @param idxHstReg The host register to load the guest register value into.
5957 * @param enmGstReg The guest register to load.
5958 *
5959 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5960 * that is something the caller needs to do if applicable.
5961 */
5962DECL_HIDDEN_THROW(uint32_t)
5963iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5964{
5965 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5966 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5967
5968 switch (g_aGstShadowInfo[enmGstReg].cb)
5969 {
5970 case sizeof(uint64_t):
5971 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5972 case sizeof(uint32_t):
5973 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5974 case sizeof(uint16_t):
5975 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5976#if 0 /* not present in the table. */
5977 case sizeof(uint8_t):
5978 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5979#endif
5980 default:
5981 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5982 }
5983}
5984
5985
5986/**
5987 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5988 * extending to 64-bit width, extended version.
5989 *
5990 * @returns New code buffer offset on success, UINT32_MAX on failure.
5991 * @param pCodeBuf The code buffer.
5992 * @param off The current code buffer position.
5993 * @param idxHstReg The host register to load the guest register value into.
5994 * @param enmGstReg The guest register to load.
5995 *
5996 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5997 * that is something the caller needs to do if applicable.
5998 */
5999DECL_HIDDEN_THROW(uint32_t)
6000iemNativeEmitLoadGprWithGstShadowRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6001{
6002 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6003 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6004
6005 switch (g_aGstShadowInfo[enmGstReg].cb)
6006 {
6007 case sizeof(uint64_t):
6008 return iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6009 case sizeof(uint32_t):
6010 return iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6011 case sizeof(uint16_t):
6012 return iemNativeEmitLoadGprFromVCpuU16Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6013#if 0 /* not present in the table. */
6014 case sizeof(uint8_t):
6015 return iemNativeEmitLoadGprFromVCpuU8Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6016#endif
6017 default:
6018#ifdef IEM_WITH_THROW_CATCH
6019 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6020#else
6021 AssertReleaseFailedReturn(off);
6022#endif
6023 }
6024}
6025
6026
6027#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6028/**
6029 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6030 *
6031 * @returns New code buffer offset on success, UINT32_MAX on failure.
6032 * @param pReNative The recompiler state.
6033 * @param off The current code buffer position.
6034 * @param idxHstSimdReg The host register to load the guest register value into.
6035 * @param enmGstSimdReg The guest register to load.
6036 * @param enmLoadSz The load size of the register.
6037 *
6038 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6039 * that is something the caller needs to do if applicable.
6040 */
6041DECL_HIDDEN_THROW(uint32_t)
6042iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6043 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6044{
6045 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6046
6047 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6048 switch (enmLoadSz)
6049 {
6050 case kIemNativeGstSimdRegLdStSz_256:
6051 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6052 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6053 case kIemNativeGstSimdRegLdStSz_Low128:
6054 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6055 case kIemNativeGstSimdRegLdStSz_High128:
6056 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6057 default:
6058 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6059 }
6060}
6061#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6062
6063#ifdef VBOX_STRICT
6064
6065/**
6066 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6067 *
6068 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6069 * Trashes EFLAGS on AMD64.
6070 */
6071DECL_FORCE_INLINE(uint32_t)
6072iemNativeEmitTop32BitsClearCheckEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxReg)
6073{
6074# ifdef RT_ARCH_AMD64
6075 /* rol reg64, 32 */
6076 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6077 pCodeBuf[off++] = 0xc1;
6078 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6079 pCodeBuf[off++] = 32;
6080
6081 /* test reg32, ffffffffh */
6082 if (idxReg >= 8)
6083 pCodeBuf[off++] = X86_OP_REX_B;
6084 pCodeBuf[off++] = 0xf7;
6085 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6086 pCodeBuf[off++] = 0xff;
6087 pCodeBuf[off++] = 0xff;
6088 pCodeBuf[off++] = 0xff;
6089 pCodeBuf[off++] = 0xff;
6090
6091 /* je/jz +1 */
6092 pCodeBuf[off++] = 0x74;
6093 pCodeBuf[off++] = 0x01;
6094
6095 /* int3 */
6096 pCodeBuf[off++] = 0xcc;
6097
6098 /* rol reg64, 32 */
6099 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6100 pCodeBuf[off++] = 0xc1;
6101 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6102 pCodeBuf[off++] = 32;
6103
6104# elif defined(RT_ARCH_ARM64)
6105 /* lsr tmp0, reg64, #32 */
6106 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6107 /* cbz tmp0, +1 */
6108 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6109 /* brk #0x1100 */
6110 pCodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6111
6112# else
6113# error "Port me!"
6114# endif
6115 return off;
6116}
6117
6118
6119/**
6120 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6121 *
6122 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6123 * Trashes EFLAGS on AMD64.
6124 */
6125DECL_HIDDEN_THROW(uint32_t)
6126iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6127{
6128# ifdef RT_ARCH_AMD64
6129 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6130# elif defined(RT_ARCH_ARM64)
6131 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6132# else
6133# error "Port me!"
6134# endif
6135 off = iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6137 return off;
6138}
6139
6140
6141/**
6142 * Emitting code that checks that the content of register @a idxReg is the same
6143 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6144 * instruction if that's not the case.
6145 *
6146 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6147 * Trashes EFLAGS on AMD64.
6148 */
6149DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheckEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf,
6150 uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6151{
6152#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6153 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6154 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6155 return off;
6156#endif
6157
6158# ifdef RT_ARCH_AMD64
6159 /* cmp reg, [mem] */
6160 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6161 {
6162 if (idxReg >= 8)
6163 pCodeBuf[off++] = X86_OP_REX_R;
6164 pCodeBuf[off++] = 0x38;
6165 }
6166 else
6167 {
6168 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6169 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6170 else
6171 {
6172 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6173 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6174 else
6175 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6176 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6177 if (idxReg >= 8)
6178 pCodeBuf[off++] = X86_OP_REX_R;
6179 }
6180 pCodeBuf[off++] = 0x39;
6181 }
6182 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6183
6184 /* je/jz +1 */
6185 pCodeBuf[off++] = 0x74;
6186 pCodeBuf[off++] = 0x01;
6187
6188 /* int3 */
6189 pCodeBuf[off++] = 0xcc;
6190
6191 /* For values smaller than the register size, we must check that the rest
6192 of the register is all zeros. */
6193 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6194 {
6195 /* test reg64, imm32 */
6196 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6197 pCodeBuf[off++] = 0xf7;
6198 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6199 pCodeBuf[off++] = 0;
6200 pCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6201 pCodeBuf[off++] = 0xff;
6202 pCodeBuf[off++] = 0xff;
6203
6204 /* je/jz +1 */
6205 pCodeBuf[off++] = 0x74;
6206 pCodeBuf[off++] = 0x01;
6207
6208 /* int3 */
6209 pCodeBuf[off++] = 0xcc;
6210 }
6211 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6212 iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6213 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6214
6215# elif defined(RT_ARCH_ARM64)
6216 /* mov TMP0, [gstreg] */
6217 off = iemNativeEmitLoadGprWithGstShadowRegEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6218
6219 /* sub tmp0, tmp0, idxReg */
6220 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6221 /* cbz tmp0, +2 */
6222 pCodeBuf[off++] = Armv8A64MkInstrCbz(2, IEMNATIVE_REG_FIXED_TMP0);
6223 /* brk #0x1000+enmGstReg */
6224 pCodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6225 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6226
6227# else
6228# error "Port me!"
6229# endif
6230 return off;
6231}
6232
6233
6234/**
6235 * Emitting code that checks that the content of register @a idxReg is the same
6236 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6237 * instruction if that's not the case.
6238 *
6239 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6240 * Trashes EFLAGS on AMD64.
6241 */
6242DECL_HIDDEN_THROW(uint32_t)
6243iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6244{
6245#ifdef RT_ARCH_AMD64
6246 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6247#elif defined(RT_ARCH_ARM64)
6248 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6249# else
6250# error "Port me!"
6251# endif
6252 return iemNativeEmitGuestRegValueCheckEx(pReNative, pCodeBuf, off, idxReg, enmGstReg);
6253}
6254
6255# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6256# ifdef RT_ARCH_AMD64
6257/**
6258 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6259 */
6260DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6261{
6262 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6263 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6264 if (idxSimdReg >= 8)
6265 pbCodeBuf[off++] = X86_OP_REX_R;
6266 pbCodeBuf[off++] = 0x0f;
6267 pbCodeBuf[off++] = 0x38;
6268 pbCodeBuf[off++] = 0x29;
6269 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6270
6271 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6272 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6273 pbCodeBuf[off++] = X86_OP_REX_W
6274 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6275 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6276 pbCodeBuf[off++] = 0x0f;
6277 pbCodeBuf[off++] = 0x3a;
6278 pbCodeBuf[off++] = 0x16;
6279 pbCodeBuf[off++] = 0xeb;
6280 pbCodeBuf[off++] = 0x00;
6281
6282 /* cmp tmp0, 0xffffffffffffffff. */
6283 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6284 pbCodeBuf[off++] = 0x83;
6285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6286 pbCodeBuf[off++] = 0xff;
6287
6288 /* je/jz +1 */
6289 pbCodeBuf[off++] = 0x74;
6290 pbCodeBuf[off++] = 0x01;
6291
6292 /* int3 */
6293 pbCodeBuf[off++] = 0xcc;
6294
6295 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6296 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6297 pbCodeBuf[off++] = X86_OP_REX_W
6298 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6299 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6300 pbCodeBuf[off++] = 0x0f;
6301 pbCodeBuf[off++] = 0x3a;
6302 pbCodeBuf[off++] = 0x16;
6303 pbCodeBuf[off++] = 0xeb;
6304 pbCodeBuf[off++] = 0x01;
6305
6306 /* cmp tmp0, 0xffffffffffffffff. */
6307 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6308 pbCodeBuf[off++] = 0x83;
6309 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6310 pbCodeBuf[off++] = 0xff;
6311
6312 /* je/jz +1 */
6313 pbCodeBuf[off++] = 0x74;
6314 pbCodeBuf[off++] = 0x01;
6315
6316 /* int3 */
6317 pbCodeBuf[off++] = 0xcc;
6318
6319 return off;
6320}
6321# endif
6322
6323
6324/**
6325 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6326 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6327 * instruction if that's not the case.
6328 *
6329 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6330 * Trashes EFLAGS on AMD64.
6331 */
6332DECL_HIDDEN_THROW(uint32_t)
6333iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6334 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6335{
6336 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6337 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6338 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6339 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6340 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6341 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6342 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6343 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6344 return off;
6345
6346# ifdef RT_ARCH_AMD64
6347 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6348 {
6349 /* movdqa vectmp0, idxSimdReg */
6350 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6351
6352 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6353
6354 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6355 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6356 }
6357
6358 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6359 {
6360 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6361 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6362
6363 /* vextracti128 vectmp0, idxSimdReg, 1 */
6364 pbCodeBuf[off++] = X86_OP_VEX3;
6365 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6366 | X86_OP_VEX3_BYTE1_X
6367 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6368 | 0x03; /* Opcode map */
6369 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6370 pbCodeBuf[off++] = 0x39;
6371 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6372 pbCodeBuf[off++] = 0x01;
6373
6374 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6375 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6376 }
6377# elif defined(RT_ARCH_ARM64)
6378 /* mov vectmp0, [gstreg] */
6379 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6380
6381 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6382 {
6383 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6384 /* eor vectmp0, vectmp0, idxSimdReg */
6385 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6386 /* uaddlv vectmp0, vectmp0.16B */
6387 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6388 /* umov tmp0, vectmp0.H[0] */
6389 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6390 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6391 /* cbz tmp0, +1 */
6392 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6393 /* brk #0x1000+enmGstReg */
6394 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6395 }
6396
6397 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6398 {
6399 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6400 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6401 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6402 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6403 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6404 /* umov tmp0, (vectmp0 + 1).H[0] */
6405 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6406 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6407 /* cbz tmp0, +1 */
6408 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6409 /* brk #0x1000+enmGstReg */
6410 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6411 }
6412
6413# else
6414# error "Port me!"
6415# endif
6416
6417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6418 return off;
6419}
6420# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6421
6422
6423/**
6424 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6425 * important bits.
6426 *
6427 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6428 * Trashes EFLAGS on AMD64.
6429 */
6430DECL_HIDDEN_THROW(uint32_t)
6431iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6432{
6433 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6434 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6435 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6436 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6437
6438#ifdef RT_ARCH_AMD64
6439 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6440
6441 /* je/jz +1 */
6442 pbCodeBuf[off++] = 0x74;
6443 pbCodeBuf[off++] = 0x01;
6444
6445 /* int3 */
6446 pbCodeBuf[off++] = 0xcc;
6447
6448# elif defined(RT_ARCH_ARM64)
6449 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6450
6451 /* b.eq +1 */
6452 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6453 /* brk #0x2000 */
6454 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6455
6456# else
6457# error "Port me!"
6458# endif
6459 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6460
6461 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6462 return off;
6463}
6464
6465#endif /* VBOX_STRICT */
6466
6467
6468#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6469/**
6470 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6471 */
6472DECL_HIDDEN_THROW(uint32_t)
6473iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6474{
6475 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6476
6477 fEflNeeded &= X86_EFL_STATUS_BITS;
6478 if (fEflNeeded)
6479 {
6480# ifdef RT_ARCH_AMD64
6481 /* test dword [pVCpu + offVCpu], imm32 */
6482 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6483 if (fEflNeeded <= 0xff)
6484 {
6485 pCodeBuf[off++] = 0xf6;
6486 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6487 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6488 }
6489 else
6490 {
6491 pCodeBuf[off++] = 0xf7;
6492 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6493 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6494 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6495 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6496 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6497 }
6498
6499 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6500 pCodeBuf[off++] = 0xcc;
6501
6502 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6503
6504# else
6505 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6506 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6507 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6508# ifdef RT_ARCH_ARM64
6509 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6510 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6511# else
6512# error "Port me!"
6513# endif
6514 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6515# endif
6516 }
6517 return off;
6518}
6519#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6520
6521
6522/**
6523 * Emits a code for checking the return code of a call and rcPassUp, returning
6524 * from the code if either are non-zero.
6525 */
6526DECL_HIDDEN_THROW(uint32_t)
6527iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6528{
6529#ifdef RT_ARCH_AMD64
6530 /*
6531 * AMD64: eax = call status code.
6532 */
6533
6534 /* edx = rcPassUp */
6535 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6536# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6537 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6538# endif
6539
6540 /* edx = eax | rcPassUp */
6541 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6542 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6543 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6545
6546 /* Jump to non-zero status return path. */
6547 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, off);
6548
6549 /* done. */
6550
6551#elif RT_ARCH_ARM64
6552 /*
6553 * ARM64: w0 = call status code.
6554 */
6555 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+3+3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
6556
6557# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6558 AssertCompile(ARMV8_A64_REG_X2 == IEMNATIVE_CALL_ARG2_GREG);
6559 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, ARMV8_A64_REG_X2, idxInstr);
6560# endif
6561 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6562
6563 pCodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6564
6565 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, pCodeBuf, off,
6566 ARMV8_A64_REG_X4, true /*f64Bit*/);
6567
6568#else
6569# error "port me"
6570#endif
6571 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6572 RT_NOREF_PV(idxInstr);
6573 return off;
6574}
6575
6576
6577/**
6578 * Emits a call to a CImpl function or something similar.
6579 */
6580DECL_HIDDEN_THROW(uint32_t)
6581iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6582 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6583{
6584 /* Writeback everything. */
6585 off = iemNativeRegFlushPendingWrites(pReNative, off);
6586
6587 /*
6588 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6589 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6590 */
6591 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6592 fGstShwFlush
6593 | RT_BIT_64(kIemNativeGstReg_Pc)
6594 | RT_BIT_64(kIemNativeGstReg_EFlags));
6595 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6596
6597 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6598
6599 /*
6600 * Load the parameters.
6601 */
6602#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6603 /* Special code the hidden VBOXSTRICTRC pointer. */
6604 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6605 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6606 if (cAddParams > 0)
6607 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6608 if (cAddParams > 1)
6609 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6610 if (cAddParams > 2)
6611 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6612 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6613
6614#else
6615 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6616 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6617 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6618 if (cAddParams > 0)
6619 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6620 if (cAddParams > 1)
6621 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6622 if (cAddParams > 2)
6623# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6624 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6625# else
6626 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6627# endif
6628#endif
6629
6630 /*
6631 * Make the call.
6632 */
6633 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6634
6635#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6636 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6637#endif
6638
6639#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6640 pReNative->Core.fDebugPcInitialized = false;
6641 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6642#endif
6643
6644 /*
6645 * Check the status code.
6646 */
6647 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6648}
6649
6650
6651/**
6652 * Emits a call to a threaded worker function.
6653 */
6654DECL_HIDDEN_THROW(uint32_t)
6655iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6656{
6657 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6658 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6659
6660 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6661 off = iemNativeRegFlushPendingWrites(pReNative, off);
6662
6663 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6664 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6665
6666#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6667 /* The threaded function may throw / long jmp, so set current instruction
6668 number if we're counting. */
6669 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6670#endif
6671
6672 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6673
6674#ifdef RT_ARCH_AMD64
6675 /* Load the parameters and emit the call. */
6676# ifdef RT_OS_WINDOWS
6677# ifndef VBOXSTRICTRC_STRICT_ENABLED
6678 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6679 if (cParams > 0)
6680 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6681 if (cParams > 1)
6682 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6683 if (cParams > 2)
6684 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6685# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6686 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6687 if (cParams > 0)
6688 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6689 if (cParams > 1)
6690 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6691 if (cParams > 2)
6692 {
6693 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6694 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6695 }
6696 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6697# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6698# else
6699 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6700 if (cParams > 0)
6701 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6702 if (cParams > 1)
6703 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6704 if (cParams > 2)
6705 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6706# endif
6707
6708 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6709
6710# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6711 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6712# endif
6713
6714#elif RT_ARCH_ARM64
6715 /*
6716 * ARM64:
6717 */
6718 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6719 if (cParams > 0)
6720 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6721 if (cParams > 1)
6722 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6723 if (cParams > 2)
6724 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6725
6726 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6727
6728#else
6729# error "port me"
6730#endif
6731
6732#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6733 pReNative->Core.fDebugPcInitialized = false;
6734 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6735#endif
6736
6737 /*
6738 * Check the status code.
6739 */
6740 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6741
6742 return off;
6743}
6744
6745
6746/**
6747 * The default liveness function, matching iemNativeEmitThreadedCall.
6748 */
6749IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6750{
6751 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6752 RT_NOREF(pCallEntry);
6753}
6754
6755#ifdef VBOX_WITH_STATISTICS
6756
6757/**
6758 * Emits code to update the thread call statistics.
6759 */
6760DECL_INLINE_THROW(uint32_t)
6761iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6762{
6763 /*
6764 * Update threaded function stats.
6765 */
6766 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6767 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6768# if defined(RT_ARCH_ARM64)
6769 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6770 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6771 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6772 iemNativeRegFreeTmp(pReNative, idxTmp1);
6773 iemNativeRegFreeTmp(pReNative, idxTmp2);
6774# else
6775 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6776# endif
6777 return off;
6778}
6779
6780
6781/**
6782 * Emits code to update the TB exit reason statistics.
6783 */
6784DECL_INLINE_THROW(uint32_t)
6785iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6786{
6787 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6788 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6789 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6790 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6791 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6792
6793 return off;
6794}
6795
6796#endif /* VBOX_WITH_STATISTICS */
6797
6798/**
6799 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6800 */
6801static uint32_t
6802iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6803{
6804 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6805 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6806
6807 /* Jump to ReturnBreak if the return register is NULL. */
6808 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6809 true /*f64Bit*/, offReturnBreak);
6810
6811 /* Okay, continue executing the next TB. */
6812 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6813 return off;
6814}
6815
6816
6817/**
6818 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6819 */
6820static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6821{
6822 /* set the return status */
6823 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6824}
6825
6826
6827/**
6828 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6829 */
6830static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6831{
6832 /* set the return status */
6833 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6834}
6835
6836
6837/**
6838 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6839 */
6840static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6841{
6842 /* set the return status */
6843 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6844}
6845
6846
6847/**
6848 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6849 */
6850static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6851{
6852 /*
6853 * Generate the rc + rcPassUp fiddling code.
6854 */
6855 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6856#ifdef RT_ARCH_AMD64
6857# ifdef RT_OS_WINDOWS
6858# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6859 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6860# endif
6861 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6862 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6863# else
6864 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6865 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6866# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6867 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6868# endif
6869# endif
6870# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6871 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6872# endif
6873
6874#else
6875 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6876 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6877 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6878#endif
6879
6880 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6881 return off;
6882}
6883
6884
6885/**
6886 * Emits a standard epilog.
6887 */
6888static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6889{
6890 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6891
6892 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6893 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6894
6895 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6896 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6897
6898 /*
6899 * Restore registers and return.
6900 */
6901#ifdef RT_ARCH_AMD64
6902 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6903
6904 /* Reposition esp at the r15 restore point. */
6905 pbCodeBuf[off++] = X86_OP_REX_W;
6906 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6907 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6908 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6909
6910 /* Pop non-volatile registers and return */
6911 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6912 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6913 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6914 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6915 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6916 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6917 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6918 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6919# ifdef RT_OS_WINDOWS
6920 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6921 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6922# endif
6923 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6924 pbCodeBuf[off++] = 0xc9; /* leave */
6925 pbCodeBuf[off++] = 0xc3; /* ret */
6926 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6927
6928#elif RT_ARCH_ARM64
6929 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6930
6931 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6932 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6933 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6934 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6935 IEMNATIVE_FRAME_VAR_SIZE / 8);
6936 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6937 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6938 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6939 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6940 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6941 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6942 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6943 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6944 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6945 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6946 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6947 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6948
6949 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6950 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6951 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6952 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6953
6954 /* retab / ret */
6955# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6956 if (1)
6957 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6958 else
6959# endif
6960 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6961
6962#else
6963# error "port me"
6964#endif
6965 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6966
6967 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6968 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6969
6970 return off;
6971}
6972
6973
6974
6975/*********************************************************************************************************************************
6976* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6977*********************************************************************************************************************************/
6978
6979/**
6980 * Internal work that allocates a variable with kind set to
6981 * kIemNativeVarKind_Invalid and no current stack allocation.
6982 *
6983 * The kind will either be set by the caller or later when the variable is first
6984 * assigned a value.
6985 *
6986 * @returns Unpacked index.
6987 * @internal
6988 */
6989static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6990{
6991 Assert(cbType > 0 && cbType <= 64);
6992 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6993 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6994 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6995 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6996 pReNative->Core.aVars[idxVar].cbVar = cbType;
6997 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6998 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6999 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7000 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7001 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7002 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7003 pReNative->Core.aVars[idxVar].u.uValue = 0;
7004#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7005 pReNative->Core.aVars[idxVar].fSimdReg = false;
7006#endif
7007 return idxVar;
7008}
7009
7010
7011/**
7012 * Internal work that allocates an argument variable w/o setting enmKind.
7013 *
7014 * @returns Unpacked index.
7015 * @internal
7016 */
7017static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7018{
7019 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7020 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7021 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7022
7023 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7024 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7025 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7026 return idxVar;
7027}
7028
7029
7030/**
7031 * Gets the stack slot for a stack variable, allocating one if necessary.
7032 *
7033 * Calling this function implies that the stack slot will contain a valid
7034 * variable value. The caller deals with any register currently assigned to the
7035 * variable, typically by spilling it into the stack slot.
7036 *
7037 * @returns The stack slot number.
7038 * @param pReNative The recompiler state.
7039 * @param idxVar The variable.
7040 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7041 */
7042DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7043{
7044 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7045 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7046 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7047
7048 /* Already got a slot? */
7049 uint8_t const idxStackSlot = pVar->idxStackSlot;
7050 if (idxStackSlot != UINT8_MAX)
7051 {
7052 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7053 return idxStackSlot;
7054 }
7055
7056 /*
7057 * A single slot is easy to allocate.
7058 * Allocate them from the top end, closest to BP, to reduce the displacement.
7059 */
7060 if (pVar->cbVar <= sizeof(uint64_t))
7061 {
7062 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7063 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7064 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7065 pVar->idxStackSlot = (uint8_t)iSlot;
7066 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7067 return (uint8_t)iSlot;
7068 }
7069
7070 /*
7071 * We need more than one stack slot.
7072 *
7073 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7074 */
7075 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7076 Assert(pVar->cbVar <= 64);
7077 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7078 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7079 uint32_t bmStack = pReNative->Core.bmStack;
7080 while (bmStack != UINT32_MAX)
7081 {
7082 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7083 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7084 iSlot = (iSlot - 1) & ~fBitAlignMask;
7085 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7086 {
7087 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7088 pVar->idxStackSlot = (uint8_t)iSlot;
7089 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7090 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7091 return (uint8_t)iSlot;
7092 }
7093
7094 bmStack |= (fBitAllocMask << iSlot);
7095 }
7096 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7097}
7098
7099
7100/**
7101 * Changes the variable to a stack variable.
7102 *
7103 * Currently this is s only possible to do the first time the variable is used,
7104 * switching later is can be implemented but not done.
7105 *
7106 * @param pReNative The recompiler state.
7107 * @param idxVar The variable.
7108 * @throws VERR_IEM_VAR_IPE_2
7109 */
7110DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7111{
7112 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7113 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7114 if (pVar->enmKind != kIemNativeVarKind_Stack)
7115 {
7116 /* We could in theory transition from immediate to stack as well, but it
7117 would involve the caller doing work storing the value on the stack. So,
7118 till that's required we only allow transition from invalid. */
7119 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7120 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7121 pVar->enmKind = kIemNativeVarKind_Stack;
7122
7123 /* Note! We don't allocate a stack slot here, that's only done when a
7124 slot is actually needed to hold a variable value. */
7125 }
7126}
7127
7128
7129/**
7130 * Sets it to a variable with a constant value.
7131 *
7132 * This does not require stack storage as we know the value and can always
7133 * reload it, unless of course it's referenced.
7134 *
7135 * @param pReNative The recompiler state.
7136 * @param idxVar The variable.
7137 * @param uValue The immediate value.
7138 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7139 */
7140DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7141{
7142 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7143 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7144 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7145 {
7146 /* Only simple transitions for now. */
7147 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7148 pVar->enmKind = kIemNativeVarKind_Immediate;
7149 }
7150 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7151
7152 pVar->u.uValue = uValue;
7153 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7154 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7155 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7156}
7157
7158
7159/**
7160 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7161 *
7162 * This does not require stack storage as we know the value and can always
7163 * reload it. Loading is postponed till needed.
7164 *
7165 * @param pReNative The recompiler state.
7166 * @param idxVar The variable. Unpacked.
7167 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7168 *
7169 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7170 * @internal
7171 */
7172static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7173{
7174 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7175 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7176
7177 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7178 {
7179 /* Only simple transitions for now. */
7180 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7181 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7182 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7183 }
7184 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7185
7186 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7187
7188 /* Update the other variable, ensure it's a stack variable. */
7189 /** @todo handle variables with const values... that'll go boom now. */
7190 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7191 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7192}
7193
7194
7195/**
7196 * Sets the variable to a reference (pointer) to a guest register reference.
7197 *
7198 * This does not require stack storage as we know the value and can always
7199 * reload it. Loading is postponed till needed.
7200 *
7201 * @param pReNative The recompiler state.
7202 * @param idxVar The variable.
7203 * @param enmRegClass The class guest registers to reference.
7204 * @param idxReg The register within @a enmRegClass to reference.
7205 *
7206 * @throws VERR_IEM_VAR_IPE_2
7207 */
7208DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7209 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7210{
7211 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7212 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7213
7214 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7215 {
7216 /* Only simple transitions for now. */
7217 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7218 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7219 }
7220 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7221
7222 pVar->u.GstRegRef.enmClass = enmRegClass;
7223 pVar->u.GstRegRef.idx = idxReg;
7224}
7225
7226
7227DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7228{
7229 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7230}
7231
7232
7233DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7234{
7235 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7236
7237 /* Since we're using a generic uint64_t value type, we must truncate it if
7238 the variable is smaller otherwise we may end up with too large value when
7239 scaling up a imm8 w/ sign-extension.
7240
7241 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7242 in the bios, bx=1) when running on arm, because clang expect 16-bit
7243 register parameters to have bits 16 and up set to zero. Instead of
7244 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7245 CF value in the result. */
7246 switch (cbType)
7247 {
7248 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7249 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7250 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7251 }
7252 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7253 return idxVar;
7254}
7255
7256
7257DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7258{
7259 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7260 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7261 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7262 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7263 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7264 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7265
7266 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7267 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7268 return idxArgVar;
7269}
7270
7271
7272DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7273{
7274 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7275 /* Don't set to stack now, leave that to the first use as for instance
7276 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7277 return idxVar;
7278}
7279
7280
7281DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7282{
7283 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7284
7285 /* Since we're using a generic uint64_t value type, we must truncate it if
7286 the variable is smaller otherwise we may end up with too large value when
7287 scaling up a imm8 w/ sign-extension. */
7288 switch (cbType)
7289 {
7290 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7291 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7292 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7293 }
7294 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7295 return idxVar;
7296}
7297
7298
7299DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7300 uint8_t cbType, uint8_t idxVarOther)
7301{
7302 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7303 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7304
7305 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7306 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7307
7308/** @todo combine MOV and AND using MOVZX/similar. */
7309 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7310
7311 /* Truncate the value to this variables size. */
7312 switch (cbType)
7313 {
7314 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7315 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7316 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7317 }
7318
7319 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7320 iemNativeVarRegisterRelease(pReNative, idxVar);
7321 return idxVar;
7322}
7323
7324
7325/**
7326 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7327 * fixed till we call iemNativeVarRegisterRelease.
7328 *
7329 * @returns The host register number.
7330 * @param pReNative The recompiler state.
7331 * @param idxVar The variable.
7332 * @param poff Pointer to the instruction buffer offset.
7333 * In case a register needs to be freed up or the value
7334 * loaded off the stack.
7335 * @param fInitialized Set if the variable must already have been
7336 * initialized. Will throw VERR_IEM_VAR_NOT_INITIALIZED
7337 * if this is not the case.
7338 * @param idxRegPref Preferred register number or UINT8_MAX.
7339 *
7340 * @note Must not modify the host status flags!
7341 */
7342DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7343 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7344{
7345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7346 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7347 Assert(pVar->cbVar <= 8);
7348 Assert(!pVar->fRegAcquired);
7349
7350 uint8_t idxReg = pVar->idxReg;
7351 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7352 {
7353 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7354 && pVar->enmKind < kIemNativeVarKind_End);
7355 pVar->fRegAcquired = true;
7356 return idxReg;
7357 }
7358
7359 /*
7360 * If the kind of variable has not yet been set, default to 'stack'.
7361 */
7362 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7363 && pVar->enmKind < kIemNativeVarKind_End);
7364 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7365 iemNativeVarSetKindToStack(pReNative, idxVar);
7366
7367 /*
7368 * We have to allocate a register for the variable, even if its a stack one
7369 * as we don't know if there are modification being made to it before its
7370 * finalized (todo: analyze and insert hints about that?).
7371 *
7372 * If we can, we try get the correct register for argument variables. This
7373 * is assuming that most argument variables are fetched as close as possible
7374 * to the actual call, so that there aren't any interfering hidden calls
7375 * (memory accesses, etc) inbetween.
7376 *
7377 * If we cannot or it's a variable, we make sure no argument registers
7378 * that will be used by this MC block will be allocated here, and we always
7379 * prefer non-volatile registers to avoid needing to spill stuff for internal
7380 * call.
7381 */
7382 /** @todo Detect too early argument value fetches and warn about hidden
7383 * calls causing less optimal code to be generated in the python script. */
7384
7385 uint8_t const uArgNo = pVar->uArgNo;
7386 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7387 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7388 {
7389 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7390
7391#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7392 /* Writeback any dirty shadow registers we are about to unshadow. */
7393 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7394#endif
7395
7396 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7397 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7398 }
7399 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7400 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7401 {
7402 /** @todo there must be a better way for this and boot cArgsX? */
7403 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7404 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7405 & ~pReNative->Core.bmHstRegsWithGstShadow
7406 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7407 & fNotArgsMask;
7408 if (fRegs)
7409 {
7410 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7411 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7412 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7413 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7414 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7415 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7416 }
7417 else
7418 {
7419 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7420 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7421 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7422 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7423 }
7424 }
7425 else
7426 {
7427 idxReg = idxRegPref;
7428 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7429 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7430 }
7431 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7432 pVar->idxReg = idxReg;
7433
7434#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7435 pVar->fSimdReg = false;
7436#endif
7437
7438 /*
7439 * Load it off the stack if we've got a stack slot.
7440 */
7441 uint8_t const idxStackSlot = pVar->idxStackSlot;
7442 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7443 {
7444 Assert(fInitialized);
7445 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7446 switch (pVar->cbVar)
7447 {
7448 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7449 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7450 case 3: AssertFailed(); RT_FALL_THRU();
7451 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7452 default: AssertFailed(); RT_FALL_THRU();
7453 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7454 }
7455 }
7456 else
7457 {
7458 Assert(idxStackSlot == UINT8_MAX);
7459 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7460 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7461 else
7462 {
7463 /*
7464 * Convert from immediate to stack/register. This is currently only
7465 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7466 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7467 */
7468 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7469 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7470 idxVar, idxReg, pVar->u.uValue));
7471 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7472 pVar->enmKind = kIemNativeVarKind_Stack;
7473 }
7474 }
7475
7476 pVar->fRegAcquired = true;
7477 return idxReg;
7478}
7479
7480
7481#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7482/**
7483 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7484 * fixed till we call iemNativeVarRegisterRelease.
7485 *
7486 * @returns The host register number.
7487 * @param pReNative The recompiler state.
7488 * @param idxVar The variable.
7489 * @param poff Pointer to the instruction buffer offset.
7490 * In case a register needs to be freed up or the value
7491 * loaded off the stack.
7492 * @param fInitialized Set if the variable must already have been initialized.
7493 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7494 * the case.
7495 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7496 */
7497DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7498 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7499{
7500 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7501 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7502 Assert( pVar->cbVar == sizeof(RTUINT128U)
7503 || pVar->cbVar == sizeof(RTUINT256U));
7504 Assert(!pVar->fRegAcquired);
7505
7506 uint8_t idxReg = pVar->idxReg;
7507 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7508 {
7509 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7510 && pVar->enmKind < kIemNativeVarKind_End);
7511 pVar->fRegAcquired = true;
7512 return idxReg;
7513 }
7514
7515 /*
7516 * If the kind of variable has not yet been set, default to 'stack'.
7517 */
7518 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7519 && pVar->enmKind < kIemNativeVarKind_End);
7520 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7521 iemNativeVarSetKindToStack(pReNative, idxVar);
7522
7523 /*
7524 * We have to allocate a register for the variable, even if its a stack one
7525 * as we don't know if there are modification being made to it before its
7526 * finalized (todo: analyze and insert hints about that?).
7527 *
7528 * If we can, we try get the correct register for argument variables. This
7529 * is assuming that most argument variables are fetched as close as possible
7530 * to the actual call, so that there aren't any interfering hidden calls
7531 * (memory accesses, etc) inbetween.
7532 *
7533 * If we cannot or it's a variable, we make sure no argument registers
7534 * that will be used by this MC block will be allocated here, and we always
7535 * prefer non-volatile registers to avoid needing to spill stuff for internal
7536 * call.
7537 */
7538 /** @todo Detect too early argument value fetches and warn about hidden
7539 * calls causing less optimal code to be generated in the python script. */
7540
7541 uint8_t const uArgNo = pVar->uArgNo;
7542 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7543
7544 /* SIMD is bit simpler for now because there is no support for arguments. */
7545 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7546 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7547 {
7548 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7549 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7550 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7551 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7552 & fNotArgsMask;
7553 if (fRegs)
7554 {
7555 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7556 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7557 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7558 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7559 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7560 }
7561 else
7562 {
7563 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7564 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7565 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7566 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7567 }
7568 }
7569 else
7570 {
7571 idxReg = idxRegPref;
7572 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7573 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7574 }
7575 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7576
7577 pVar->fSimdReg = true;
7578 pVar->idxReg = idxReg;
7579
7580 /*
7581 * Load it off the stack if we've got a stack slot.
7582 */
7583 uint8_t const idxStackSlot = pVar->idxStackSlot;
7584 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7585 {
7586 Assert(fInitialized);
7587 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7588 switch (pVar->cbVar)
7589 {
7590 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7591 default: AssertFailed(); RT_FALL_THRU();
7592 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7593 }
7594 }
7595 else
7596 {
7597 Assert(idxStackSlot == UINT8_MAX);
7598 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7599 }
7600 pVar->fRegAcquired = true;
7601 return idxReg;
7602}
7603#endif
7604
7605
7606/**
7607 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7608 * guest register.
7609 *
7610 * This function makes sure there is a register for it and sets it to be the
7611 * current shadow copy of @a enmGstReg.
7612 *
7613 * @returns The host register number.
7614 * @param pReNative The recompiler state.
7615 * @param idxVar The variable.
7616 * @param enmGstReg The guest register this variable will be written to
7617 * after this call.
7618 * @param poff Pointer to the instruction buffer offset.
7619 * In case a register needs to be freed up or if the
7620 * variable content needs to be loaded off the stack.
7621 *
7622 * @note We DO NOT expect @a idxVar to be an argument variable,
7623 * because we can only in the commit stage of an instruction when this
7624 * function is used.
7625 */
7626DECL_HIDDEN_THROW(uint8_t)
7627iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7628{
7629 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7630 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7631 Assert(!pVar->fRegAcquired);
7632 AssertMsgStmt( pVar->cbVar <= 8
7633 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7634 || pVar->enmKind == kIemNativeVarKind_Stack),
7635 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7636 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7637 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7638
7639 /*
7640 * This shouldn't ever be used for arguments, unless it's in a weird else
7641 * branch that doesn't do any calling and even then it's questionable.
7642 *
7643 * However, in case someone writes crazy wrong MC code and does register
7644 * updates before making calls, just use the regular register allocator to
7645 * ensure we get a register suitable for the intended argument number.
7646 */
7647 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7648
7649 /*
7650 * If there is already a register for the variable, we transfer/set the
7651 * guest shadow copy assignment to it.
7652 */
7653 uint8_t idxReg = pVar->idxReg;
7654 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7655 {
7656#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7657 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7658 {
7659# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7660 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7661 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7662# endif
7663 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7664 }
7665#endif
7666
7667 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7668 {
7669 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7670 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7671 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7672 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7673 }
7674 else
7675 {
7676 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7677 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7678 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7679 }
7680 /** @todo figure this one out. We need some way of making sure the register isn't
7681 * modified after this point, just in case we start writing crappy MC code. */
7682 pVar->enmGstReg = enmGstReg;
7683 pVar->fRegAcquired = true;
7684 return idxReg;
7685 }
7686 Assert(pVar->uArgNo == UINT8_MAX);
7687
7688 /*
7689 * Because this is supposed to be the commit stage, we're just tag along with the
7690 * temporary register allocator and upgrade it to a variable register.
7691 */
7692 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7693 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7694 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7695 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7696 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7697 pVar->idxReg = idxReg;
7698
7699 /*
7700 * Now we need to load the register value.
7701 */
7702 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7703 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7704 else
7705 {
7706 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7707 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7708 switch (pVar->cbVar)
7709 {
7710 case sizeof(uint64_t):
7711 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7712 break;
7713 case sizeof(uint32_t):
7714 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7715 break;
7716 case sizeof(uint16_t):
7717 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7718 break;
7719 case sizeof(uint8_t):
7720 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7721 break;
7722 default:
7723 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7724 }
7725 }
7726
7727 pVar->fRegAcquired = true;
7728 return idxReg;
7729}
7730
7731
7732/**
7733 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7734 *
7735 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7736 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7737 * requirement of flushing anything in volatile host registers when making a
7738 * call.
7739 *
7740 * @returns New @a off value.
7741 * @param pReNative The recompiler state.
7742 * @param off The code buffer position.
7743 * @param fHstRegsNotToSave Set of registers not to save & restore.
7744 */
7745DECL_HIDDEN_THROW(uint32_t)
7746iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7747{
7748 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7749 if (fHstRegs)
7750 {
7751 do
7752 {
7753 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7754 fHstRegs &= ~RT_BIT_32(idxHstReg);
7755
7756 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7757 {
7758 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7759 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7760 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7761 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7762 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7763 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7764 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7765 {
7766 case kIemNativeVarKind_Stack:
7767 {
7768 /* Temporarily spill the variable register. */
7769 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7770 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7771 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7772 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7773 continue;
7774 }
7775
7776 case kIemNativeVarKind_Immediate:
7777 case kIemNativeVarKind_VarRef:
7778 case kIemNativeVarKind_GstRegRef:
7779 /* It is weird to have any of these loaded at this point. */
7780 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7781 continue;
7782
7783 case kIemNativeVarKind_End:
7784 case kIemNativeVarKind_Invalid:
7785 break;
7786 }
7787 AssertFailed();
7788 }
7789 else
7790 {
7791 /*
7792 * Allocate a temporary stack slot and spill the register to it.
7793 */
7794 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7795 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7796 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7797 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7798 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7799 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7800 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7801 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7802 }
7803 } while (fHstRegs);
7804 }
7805#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7806
7807 /*
7808 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7809 * which would be more difficult due to spanning multiple stack slots and different sizes
7810 * (besides we only have a limited amount of slots at the moment).
7811 *
7812 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7813 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7814 */
7815 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7816
7817 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7818 if (fHstRegs)
7819 {
7820 do
7821 {
7822 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7823 fHstRegs &= ~RT_BIT_32(idxHstReg);
7824
7825 /* Fixed reserved and temporary registers don't need saving. */
7826 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7827 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7828 continue;
7829
7830 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7831
7832 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7833 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7834 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7835 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7836 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7837 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7838 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7839 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7840 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7841 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7842 {
7843 case kIemNativeVarKind_Stack:
7844 {
7845 /* Temporarily spill the variable register. */
7846 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7847 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7848 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7849 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7850 if (cbVar == sizeof(RTUINT128U))
7851 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7852 else
7853 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7854 continue;
7855 }
7856
7857 case kIemNativeVarKind_Immediate:
7858 case kIemNativeVarKind_VarRef:
7859 case kIemNativeVarKind_GstRegRef:
7860 /* It is weird to have any of these loaded at this point. */
7861 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7862 continue;
7863
7864 case kIemNativeVarKind_End:
7865 case kIemNativeVarKind_Invalid:
7866 break;
7867 }
7868 AssertFailed();
7869 } while (fHstRegs);
7870 }
7871#endif
7872 return off;
7873}
7874
7875
7876/**
7877 * Emit code to restore volatile registers after to a call to a helper.
7878 *
7879 * @returns New @a off value.
7880 * @param pReNative The recompiler state.
7881 * @param off The code buffer position.
7882 * @param fHstRegsNotToSave Set of registers not to save & restore.
7883 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7884 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7885 */
7886DECL_HIDDEN_THROW(uint32_t)
7887iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7888{
7889 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7890 if (fHstRegs)
7891 {
7892 do
7893 {
7894 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7895 fHstRegs &= ~RT_BIT_32(idxHstReg);
7896
7897 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7898 {
7899 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7900 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7901 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7902 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7903 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7904 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7905 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7906 {
7907 case kIemNativeVarKind_Stack:
7908 {
7909 /* Unspill the variable register. */
7910 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7911 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7912 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7913 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7914 continue;
7915 }
7916
7917 case kIemNativeVarKind_Immediate:
7918 case kIemNativeVarKind_VarRef:
7919 case kIemNativeVarKind_GstRegRef:
7920 /* It is weird to have any of these loaded at this point. */
7921 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7922 continue;
7923
7924 case kIemNativeVarKind_End:
7925 case kIemNativeVarKind_Invalid:
7926 break;
7927 }
7928 AssertFailed();
7929 }
7930 else
7931 {
7932 /*
7933 * Restore from temporary stack slot.
7934 */
7935 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7936 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7937 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7938 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7939
7940 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7941 }
7942 } while (fHstRegs);
7943 }
7944#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7945 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7946 if (fHstRegs)
7947 {
7948 do
7949 {
7950 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7951 fHstRegs &= ~RT_BIT_32(idxHstReg);
7952
7953 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7954 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7955 continue;
7956 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7957
7958 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7959 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7960 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7961 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7962 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7963 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7964 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7965 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7966 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7967 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7968 {
7969 case kIemNativeVarKind_Stack:
7970 {
7971 /* Unspill the variable register. */
7972 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7973 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7974 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7975 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7976
7977 if (cbVar == sizeof(RTUINT128U))
7978 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7979 else
7980 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7981 continue;
7982 }
7983
7984 case kIemNativeVarKind_Immediate:
7985 case kIemNativeVarKind_VarRef:
7986 case kIemNativeVarKind_GstRegRef:
7987 /* It is weird to have any of these loaded at this point. */
7988 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7989 continue;
7990
7991 case kIemNativeVarKind_End:
7992 case kIemNativeVarKind_Invalid:
7993 break;
7994 }
7995 AssertFailed();
7996 } while (fHstRegs);
7997 }
7998#endif
7999 return off;
8000}
8001
8002
8003/**
8004 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8005 *
8006 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8007 *
8008 * ASSUMES that @a idxVar is valid and unpacked.
8009 */
8010DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8011{
8012 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8013 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8014 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8015 {
8016 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8017 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8018 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8019 Assert(cSlots > 0);
8020 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8021 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8022 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8023 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8024 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8025 }
8026 else
8027 Assert(idxStackSlot == UINT8_MAX);
8028}
8029
8030
8031/**
8032 * Worker that frees a single variable.
8033 *
8034 * ASSUMES that @a idxVar is valid and unpacked.
8035 */
8036DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8037{
8038 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8039 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8040 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8041
8042 /* Free the host register first if any assigned. */
8043 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8044#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8045 if ( idxHstReg != UINT8_MAX
8046 && pReNative->Core.aVars[idxVar].fSimdReg)
8047 {
8048 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8049 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8050 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8051 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8052 }
8053 else
8054#endif
8055 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8056 {
8057 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8058 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8059 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8060 }
8061
8062 /* Free argument mapping. */
8063 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8064 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8065 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8066
8067 /* Free the stack slots. */
8068 iemNativeVarFreeStackSlots(pReNative, idxVar);
8069
8070 /* Free the actual variable. */
8071 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8072 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8073}
8074
8075
8076/**
8077 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8078 */
8079DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8080{
8081 while (bmVars != 0)
8082 {
8083 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8084 bmVars &= ~RT_BIT_32(idxVar);
8085
8086#if 1 /** @todo optimize by simplifying this later... */
8087 iemNativeVarFreeOneWorker(pReNative, idxVar);
8088#else
8089 /* Only need to free the host register, the rest is done as bulk updates below. */
8090 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8091 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8092 {
8093 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8094 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8095 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8096 }
8097#endif
8098 }
8099#if 0 /** @todo optimize by simplifying this later... */
8100 pReNative->Core.bmVars = 0;
8101 pReNative->Core.bmStack = 0;
8102 pReNative->Core.u64ArgVars = UINT64_MAX;
8103#endif
8104}
8105
8106
8107
8108/*********************************************************************************************************************************
8109* Emitters for IEM_MC_CALL_CIMPL_XXX *
8110*********************************************************************************************************************************/
8111
8112/**
8113 * Emits code to load a reference to the given guest register into @a idxGprDst.
8114 */
8115DECL_HIDDEN_THROW(uint32_t)
8116iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8117 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8118{
8119#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8120 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8121#endif
8122
8123 /*
8124 * Get the offset relative to the CPUMCTX structure.
8125 */
8126 uint32_t offCpumCtx;
8127 switch (enmClass)
8128 {
8129 case kIemNativeGstRegRef_Gpr:
8130 Assert(idxRegInClass < 16);
8131 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8132 break;
8133
8134 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8135 Assert(idxRegInClass < 4);
8136 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8137 break;
8138
8139 case kIemNativeGstRegRef_EFlags:
8140 Assert(idxRegInClass == 0);
8141 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8142 break;
8143
8144 case kIemNativeGstRegRef_MxCsr:
8145 Assert(idxRegInClass == 0);
8146 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8147 break;
8148
8149 case kIemNativeGstRegRef_FpuReg:
8150 Assert(idxRegInClass < 8);
8151 AssertFailed(); /** @todo what kind of indexing? */
8152 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8153 break;
8154
8155 case kIemNativeGstRegRef_MReg:
8156 Assert(idxRegInClass < 8);
8157 AssertFailed(); /** @todo what kind of indexing? */
8158 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8159 break;
8160
8161 case kIemNativeGstRegRef_XReg:
8162 Assert(idxRegInClass < 16);
8163 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8164 break;
8165
8166 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8167 Assert(idxRegInClass == 0);
8168 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8169 break;
8170
8171 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8172 Assert(idxRegInClass == 0);
8173 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8174 break;
8175
8176 default:
8177 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8178 }
8179
8180 /*
8181 * Load the value into the destination register.
8182 */
8183#ifdef RT_ARCH_AMD64
8184 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8185
8186#elif defined(RT_ARCH_ARM64)
8187 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8188 Assert(offCpumCtx < 4096);
8189 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8190
8191#else
8192# error "Port me!"
8193#endif
8194
8195 return off;
8196}
8197
8198
8199/**
8200 * Common code for CIMPL and AIMPL calls.
8201 *
8202 * These are calls that uses argument variables and such. They should not be
8203 * confused with internal calls required to implement an MC operation,
8204 * like a TLB load and similar.
8205 *
8206 * Upon return all that is left to do is to load any hidden arguments and
8207 * perform the call. All argument variables are freed.
8208 *
8209 * @returns New code buffer offset; throws VBox status code on error.
8210 * @param pReNative The native recompile state.
8211 * @param off The code buffer offset.
8212 * @param cArgs The total nubmer of arguments (includes hidden
8213 * count).
8214 * @param cHiddenArgs The number of hidden arguments. The hidden
8215 * arguments must not have any variable declared for
8216 * them, whereas all the regular arguments must
8217 * (tstIEMCheckMc ensures this).
8218 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8219 * this will still flush pending writes in call volatile registers if false.
8220 */
8221DECL_HIDDEN_THROW(uint32_t)
8222iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8223 bool fFlushPendingWrites /*= true*/)
8224{
8225#ifdef VBOX_STRICT
8226 /*
8227 * Assert sanity.
8228 */
8229 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8230 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8231 for (unsigned i = 0; i < cHiddenArgs; i++)
8232 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8233 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8234 {
8235 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8236 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8237 }
8238 iemNativeRegAssertSanity(pReNative);
8239#endif
8240
8241 /* We don't know what the called function makes use of, so flush any pending register writes. */
8242 RT_NOREF(fFlushPendingWrites);
8243#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8244 if (fFlushPendingWrites)
8245#endif
8246 off = iemNativeRegFlushPendingWrites(pReNative, off);
8247
8248 /*
8249 * Before we do anything else, go over variables that are referenced and
8250 * make sure they are not in a register.
8251 */
8252 uint32_t bmVars = pReNative->Core.bmVars;
8253 if (bmVars)
8254 {
8255 do
8256 {
8257 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8258 bmVars &= ~RT_BIT_32(idxVar);
8259
8260 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8261 {
8262 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8263#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8264 if ( idxRegOld != UINT8_MAX
8265 && pReNative->Core.aVars[idxVar].fSimdReg)
8266 {
8267 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8268 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8269
8270 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8271 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8272 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8273 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8274 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8275 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8276 else
8277 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8278
8279 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8280 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8281
8282 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8283 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8284 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8285 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8286 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8287 }
8288 else
8289#endif
8290 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8291 {
8292 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8293 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8294 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8295 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8296 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8297
8298 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8299 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8300 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8301 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8302 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8303 }
8304 }
8305 } while (bmVars != 0);
8306#if 0 //def VBOX_STRICT
8307 iemNativeRegAssertSanity(pReNative);
8308#endif
8309 }
8310
8311 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8312
8313#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8314 /*
8315 * At the very first step go over the host registers that will be used for arguments
8316 * don't shadow anything which needs writing back first.
8317 */
8318 for (uint32_t i = 0; i < cRegArgs; i++)
8319 {
8320 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8321
8322 /* Writeback any dirty guest shadows before using this register. */
8323 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8324 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8325 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8326 }
8327#endif
8328
8329 /*
8330 * First, go over the host registers that will be used for arguments and make
8331 * sure they either hold the desired argument or are free.
8332 */
8333 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8334 {
8335 for (uint32_t i = 0; i < cRegArgs; i++)
8336 {
8337 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8338 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8339 {
8340 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8341 {
8342 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8343 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8344 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8345 Assert(pVar->idxReg == idxArgReg);
8346 uint8_t const uArgNo = pVar->uArgNo;
8347 if (uArgNo == i)
8348 { /* prefect */ }
8349 /* The variable allocator logic should make sure this is impossible,
8350 except for when the return register is used as a parameter (ARM,
8351 but not x86). */
8352#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8353 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8354 {
8355# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8356# error "Implement this"
8357# endif
8358 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8359 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8360 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8361 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8362 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8363 }
8364#endif
8365 else
8366 {
8367 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8368
8369 if (pVar->enmKind == kIemNativeVarKind_Stack)
8370 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8371 else
8372 {
8373 /* just free it, can be reloaded if used again */
8374 pVar->idxReg = UINT8_MAX;
8375 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8376 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8377 }
8378 }
8379 }
8380 else
8381 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8382 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8383 }
8384 }
8385#if 0 //def VBOX_STRICT
8386 iemNativeRegAssertSanity(pReNative);
8387#endif
8388 }
8389
8390 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8391
8392#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8393 /*
8394 * If there are any stack arguments, make sure they are in their place as well.
8395 *
8396 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8397 * the caller) be loading it later and it must be free (see first loop).
8398 */
8399 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8400 {
8401 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8402 {
8403 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8404 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8405 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8406 {
8407 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8408 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8409 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8410 pVar->idxReg = UINT8_MAX;
8411 }
8412 else
8413 {
8414 /* Use ARG0 as temp for stuff we need registers for. */
8415 switch (pVar->enmKind)
8416 {
8417 case kIemNativeVarKind_Stack:
8418 {
8419 uint8_t const idxStackSlot = pVar->idxStackSlot;
8420 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8421 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8422 iemNativeStackCalcBpDisp(idxStackSlot));
8423 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8424 continue;
8425 }
8426
8427 case kIemNativeVarKind_Immediate:
8428 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8429 continue;
8430
8431 case kIemNativeVarKind_VarRef:
8432 {
8433 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8434 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8435 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8436 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8437 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8438# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8439 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8440 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8441 if ( fSimdReg
8442 && idxRegOther != UINT8_MAX)
8443 {
8444 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8445 if (cbVar == sizeof(RTUINT128U))
8446 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8447 else
8448 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8449 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8450 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8451 }
8452 else
8453# endif
8454 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8455 {
8456 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8457 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8458 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8459 }
8460 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8461 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8462 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8463 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8464 continue;
8465 }
8466
8467 case kIemNativeVarKind_GstRegRef:
8468 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8469 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8470 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8471 continue;
8472
8473 case kIemNativeVarKind_Invalid:
8474 case kIemNativeVarKind_End:
8475 break;
8476 }
8477 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8478 }
8479 }
8480# if 0 //def VBOX_STRICT
8481 iemNativeRegAssertSanity(pReNative);
8482# endif
8483 }
8484#else
8485 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8486#endif
8487
8488 /*
8489 * Make sure the argument variables are loaded into their respective registers.
8490 *
8491 * We can optimize this by ASSUMING that any register allocations are for
8492 * registeres that have already been loaded and are ready. The previous step
8493 * saw to that.
8494 */
8495 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8496 {
8497 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8498 {
8499 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8500 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8501 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8502 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8503 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8504 else
8505 {
8506 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8507 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8508 {
8509 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8510 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8511 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8512 | RT_BIT_32(idxArgReg);
8513 pVar->idxReg = idxArgReg;
8514 }
8515 else
8516 {
8517 /* Use ARG0 as temp for stuff we need registers for. */
8518 switch (pVar->enmKind)
8519 {
8520 case kIemNativeVarKind_Stack:
8521 {
8522 uint8_t const idxStackSlot = pVar->idxStackSlot;
8523 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8524 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8525 continue;
8526 }
8527
8528 case kIemNativeVarKind_Immediate:
8529 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8530 continue;
8531
8532 case kIemNativeVarKind_VarRef:
8533 {
8534 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8535 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8536 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8537 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8538 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8539 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8540#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8541 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8542 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8543 if ( fSimdReg
8544 && idxRegOther != UINT8_MAX)
8545 {
8546 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8547 if (cbVar == sizeof(RTUINT128U))
8548 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8549 else
8550 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8551 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8552 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8553 }
8554 else
8555#endif
8556 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8557 {
8558 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8559 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8560 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8561 }
8562 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8563 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8564 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8565 continue;
8566 }
8567
8568 case kIemNativeVarKind_GstRegRef:
8569 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8570 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8571 continue;
8572
8573 case kIemNativeVarKind_Invalid:
8574 case kIemNativeVarKind_End:
8575 break;
8576 }
8577 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8578 }
8579 }
8580 }
8581#if 0 //def VBOX_STRICT
8582 iemNativeRegAssertSanity(pReNative);
8583#endif
8584 }
8585#ifdef VBOX_STRICT
8586 else
8587 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8588 {
8589 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8590 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8591 }
8592#endif
8593
8594 /*
8595 * Free all argument variables (simplified).
8596 * Their lifetime always expires with the call they are for.
8597 */
8598 /** @todo Make the python script check that arguments aren't used after
8599 * IEM_MC_CALL_XXXX. */
8600 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8601 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8602 * an argument value. There is also some FPU stuff. */
8603 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8604 {
8605 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8606 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8607
8608 /* no need to free registers: */
8609 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8610 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8611 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8612 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8613 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8614 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8615
8616 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8617 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8618 iemNativeVarFreeStackSlots(pReNative, idxVar);
8619 }
8620 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8621
8622 /*
8623 * Flush volatile registers as we make the call.
8624 */
8625 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8626
8627 return off;
8628}
8629
8630
8631
8632/*********************************************************************************************************************************
8633* TLB Lookup. *
8634*********************************************************************************************************************************/
8635
8636/**
8637 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8638 */
8639DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8640{
8641 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8642 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8643 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8644 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8645 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8646
8647 /* Do the lookup manually. */
8648 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8649 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8650 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8651 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8652 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8653 {
8654 /*
8655 * Check TLB page table level access flags.
8656 */
8657 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8658 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8659 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8660 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8661 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8662 | IEMTLBE_F_PG_UNASSIGNED
8663 | IEMTLBE_F_PT_NO_ACCESSED
8664 | fNoWriteNoDirty | fNoUser);
8665 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8666 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8667 {
8668 /*
8669 * Return the address.
8670 */
8671 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8672 if ((uintptr_t)pbAddr == uResult)
8673 return;
8674 RT_NOREF(cbMem);
8675 AssertFailed();
8676 }
8677 else
8678 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8679 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8680 }
8681 else
8682 AssertFailed();
8683 RT_BREAKPOINT();
8684}
8685
8686/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8687
8688
8689
8690/*********************************************************************************************************************************
8691* Recompiler Core. *
8692*********************************************************************************************************************************/
8693
8694/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8695static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8696{
8697 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8698 pDis->cbCachedInstr += cbMaxRead;
8699 RT_NOREF(cbMinRead);
8700 return VERR_NO_DATA;
8701}
8702
8703
8704DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8705{
8706 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8707 {
8708#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8709 ENTRY(fLocalForcedActions),
8710 ENTRY(iem.s.rcPassUp),
8711 ENTRY(iem.s.fExec),
8712 ENTRY(iem.s.pbInstrBuf),
8713 ENTRY(iem.s.uInstrBufPc),
8714 ENTRY(iem.s.GCPhysInstrBuf),
8715 ENTRY(iem.s.cbInstrBufTotal),
8716 ENTRY(iem.s.idxTbCurInstr),
8717 ENTRY(iem.s.fSkippingEFlags),
8718#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8719 ENTRY(iem.s.uPcUpdatingDebug),
8720#endif
8721#ifdef VBOX_WITH_STATISTICS
8722 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8723 ENTRY(iem.s.StatNativeTlbHitsForStore),
8724 ENTRY(iem.s.StatNativeTlbHitsForStack),
8725 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8726 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8727 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8728 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8729 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8730#endif
8731 ENTRY(iem.s.DataTlb.uTlbRevision),
8732 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8733 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8734 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8735 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8736 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8737 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8738 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8739 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8740 ENTRY(iem.s.DataTlb.aEntries),
8741 ENTRY(iem.s.CodeTlb.uTlbRevision),
8742 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8743 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8744 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8745 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8746 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8747 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8748 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8749 ENTRY(iem.s.CodeTlb.aEntries),
8750 ENTRY(pVMR3),
8751 ENTRY(cpum.GstCtx.rax),
8752 ENTRY(cpum.GstCtx.ah),
8753 ENTRY(cpum.GstCtx.rcx),
8754 ENTRY(cpum.GstCtx.ch),
8755 ENTRY(cpum.GstCtx.rdx),
8756 ENTRY(cpum.GstCtx.dh),
8757 ENTRY(cpum.GstCtx.rbx),
8758 ENTRY(cpum.GstCtx.bh),
8759 ENTRY(cpum.GstCtx.rsp),
8760 ENTRY(cpum.GstCtx.rbp),
8761 ENTRY(cpum.GstCtx.rsi),
8762 ENTRY(cpum.GstCtx.rdi),
8763 ENTRY(cpum.GstCtx.r8),
8764 ENTRY(cpum.GstCtx.r9),
8765 ENTRY(cpum.GstCtx.r10),
8766 ENTRY(cpum.GstCtx.r11),
8767 ENTRY(cpum.GstCtx.r12),
8768 ENTRY(cpum.GstCtx.r13),
8769 ENTRY(cpum.GstCtx.r14),
8770 ENTRY(cpum.GstCtx.r15),
8771 ENTRY(cpum.GstCtx.es.Sel),
8772 ENTRY(cpum.GstCtx.es.u64Base),
8773 ENTRY(cpum.GstCtx.es.u32Limit),
8774 ENTRY(cpum.GstCtx.es.Attr),
8775 ENTRY(cpum.GstCtx.cs.Sel),
8776 ENTRY(cpum.GstCtx.cs.u64Base),
8777 ENTRY(cpum.GstCtx.cs.u32Limit),
8778 ENTRY(cpum.GstCtx.cs.Attr),
8779 ENTRY(cpum.GstCtx.ss.Sel),
8780 ENTRY(cpum.GstCtx.ss.u64Base),
8781 ENTRY(cpum.GstCtx.ss.u32Limit),
8782 ENTRY(cpum.GstCtx.ss.Attr),
8783 ENTRY(cpum.GstCtx.ds.Sel),
8784 ENTRY(cpum.GstCtx.ds.u64Base),
8785 ENTRY(cpum.GstCtx.ds.u32Limit),
8786 ENTRY(cpum.GstCtx.ds.Attr),
8787 ENTRY(cpum.GstCtx.fs.Sel),
8788 ENTRY(cpum.GstCtx.fs.u64Base),
8789 ENTRY(cpum.GstCtx.fs.u32Limit),
8790 ENTRY(cpum.GstCtx.fs.Attr),
8791 ENTRY(cpum.GstCtx.gs.Sel),
8792 ENTRY(cpum.GstCtx.gs.u64Base),
8793 ENTRY(cpum.GstCtx.gs.u32Limit),
8794 ENTRY(cpum.GstCtx.gs.Attr),
8795 ENTRY(cpum.GstCtx.rip),
8796 ENTRY(cpum.GstCtx.eflags),
8797 ENTRY(cpum.GstCtx.uRipInhibitInt),
8798 ENTRY(cpum.GstCtx.cr0),
8799 ENTRY(cpum.GstCtx.cr4),
8800 ENTRY(cpum.GstCtx.aXcr[0]),
8801 ENTRY(cpum.GstCtx.aXcr[1]),
8802#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8803 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8804 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8805 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8806 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8807 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8808 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8809 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8810 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8811 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8812 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8813 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8814 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8815 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8816 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8817 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8818 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8819 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8820 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8821 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8822 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8823 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8824 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8825 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8826 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8827 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8828 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8829 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8830 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8831 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8832 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8833 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8834 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8835 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8836#endif
8837#undef ENTRY
8838 };
8839#ifdef VBOX_STRICT
8840 static bool s_fOrderChecked = false;
8841 if (!s_fOrderChecked)
8842 {
8843 s_fOrderChecked = true;
8844 uint32_t offPrev = s_aMembers[0].off;
8845 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8846 {
8847 Assert(s_aMembers[i].off > offPrev);
8848 offPrev = s_aMembers[i].off;
8849 }
8850 }
8851#endif
8852
8853 /*
8854 * Binary lookup.
8855 */
8856 unsigned iStart = 0;
8857 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8858 for (;;)
8859 {
8860 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8861 uint32_t const offCur = s_aMembers[iCur].off;
8862 if (off < offCur)
8863 {
8864 if (iCur != iStart)
8865 iEnd = iCur;
8866 else
8867 break;
8868 }
8869 else if (off > offCur)
8870 {
8871 if (iCur + 1 < iEnd)
8872 iStart = iCur + 1;
8873 else
8874 break;
8875 }
8876 else
8877 return s_aMembers[iCur].pszName;
8878 }
8879#ifdef VBOX_WITH_STATISTICS
8880 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8881 return "iem.s.acThreadedFuncStats[iFn]";
8882#endif
8883 return NULL;
8884}
8885
8886
8887/**
8888 * Translates a label to a name.
8889 */
8890static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
8891{
8892 switch (enmLabel)
8893 {
8894#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
8895 STR_CASE_CMN(Invalid);
8896 STR_CASE_CMN(RaiseDe);
8897 STR_CASE_CMN(RaiseUd);
8898 STR_CASE_CMN(RaiseSseRelated);
8899 STR_CASE_CMN(RaiseAvxRelated);
8900 STR_CASE_CMN(RaiseSseAvxFpRelated);
8901 STR_CASE_CMN(RaiseNm);
8902 STR_CASE_CMN(RaiseGp0);
8903 STR_CASE_CMN(RaiseMf);
8904 STR_CASE_CMN(RaiseXf);
8905 STR_CASE_CMN(ObsoleteTb);
8906 STR_CASE_CMN(NeedCsLimChecking);
8907 STR_CASE_CMN(CheckBranchMiss);
8908 STR_CASE_CMN(ReturnSuccess);
8909 STR_CASE_CMN(ReturnBreak);
8910 STR_CASE_CMN(ReturnBreakFF);
8911 STR_CASE_CMN(ReturnWithFlags);
8912 STR_CASE_CMN(ReturnBreakViaLookup);
8913 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
8914 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
8915 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
8916 STR_CASE_CMN(NonZeroRetOrPassUp);
8917#undef STR_CASE_CMN
8918#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
8919 STR_CASE_LBL(LoopJumpTarget);
8920 STR_CASE_LBL(If);
8921 STR_CASE_LBL(Else);
8922 STR_CASE_LBL(Endif);
8923 STR_CASE_LBL(CheckIrq);
8924 STR_CASE_LBL(TlbLookup);
8925 STR_CASE_LBL(TlbMiss);
8926 STR_CASE_LBL(TlbDone);
8927 case kIemNativeLabelType_End: break;
8928 }
8929 return NULL;
8930}
8931
8932
8933/** Info for the symbols resolver used when disassembling. */
8934typedef struct IEMNATIVDISASMSYMCTX
8935{
8936 PVMCPU pVCpu;
8937 PCIEMTB pTb;
8938 PCIEMNATIVEPERCHUNKCTX pCtx;
8939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8940 PCIEMTBDBG pDbgInfo;
8941#endif
8942} IEMNATIVDISASMSYMCTX;
8943typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
8944
8945
8946/**
8947 * Resolve address to symbol, if we can.
8948 */
8949static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
8950{
8951 PCIEMTB const pTb = pSymCtx->pTb;
8952 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
8953 if (offNative <= pTb->Native.cInstructions)
8954 {
8955#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8956 /*
8957 * Scan debug info for a matching label.
8958 * Since the debug info should be 100% linear, we can do a binary search here.
8959 */
8960 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
8961 if (pDbgInfo)
8962 {
8963 uint32_t const cEntries = pDbgInfo->cEntries;
8964 uint32_t idxEnd = cEntries;
8965 uint32_t idxStart = 0;
8966 for (;;)
8967 {
8968 /* Find a NativeOffset record close to the midpoint. */
8969 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
8970 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
8971 idx--;
8972 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
8973 {
8974 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
8975 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
8976 idx++;
8977 if (idx >= idxEnd)
8978 break;
8979 }
8980
8981 /* Do the binary searching thing. */
8982 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
8983 {
8984 if (idx > idxStart)
8985 idxEnd = idx;
8986 else
8987 break;
8988 }
8989 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
8990 {
8991 idx += 1;
8992 if (idx < idxEnd)
8993 idxStart = idx;
8994 else
8995 break;
8996 }
8997 else
8998 {
8999 /* Got a matching offset, scan forward till we hit a label, but
9000 stop when the native offset changes. */
9001 while (++idx < cEntries)
9002 switch (pDbgInfo->aEntries[idx].Gen.uType)
9003 {
9004 case kIemTbDbgEntryType_Label:
9005 {
9006 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9007 const char * const pszName = iemNativeGetLabelName(enmLabel);
9008 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9009 return pszName;
9010 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9011 return pszBuf;
9012 }
9013
9014 case kIemTbDbgEntryType_NativeOffset:
9015 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9016 return NULL;
9017 break;
9018 }
9019 break;
9020 }
9021 }
9022 }
9023#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9024 }
9025 else
9026 {
9027 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9028 if (pChunkCtx)
9029 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9030 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9031 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9032 }
9033 RT_NOREF(pszBuf, cbBuf);
9034 return NULL;
9035}
9036
9037#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9038
9039/**
9040 * @callback_method_impl{FNDISGETSYMBOL}
9041 */
9042static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9043 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9044{
9045 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9046 if (pszSym)
9047 {
9048 *poff = 0;
9049 if (pszSym != pszBuf)
9050 return RTStrCopy(pszBuf, cchBuf, pszSym);
9051 return VINF_SUCCESS;
9052 }
9053 RT_NOREF(pDis, u32Sel);
9054 return VERR_SYMBOL_NOT_FOUND;
9055}
9056
9057#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9058
9059/**
9060 * Annotates an instruction decoded by the capstone disassembler.
9061 */
9062static const char *
9063iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9064{
9065# if defined(RT_ARCH_ARM64)
9066 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9067 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9068 {
9069 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9070 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9071 char const *psz = strchr(pInstr->op_str, '[');
9072 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9073 {
9074 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9075 int32_t off = -1;
9076 psz += 4;
9077 if (*psz == ']')
9078 off = 0;
9079 else if (*psz == ',')
9080 {
9081 psz = RTStrStripL(psz + 1);
9082 if (*psz == '#')
9083 off = RTStrToInt32(&psz[1]);
9084 /** @todo deal with index registers and LSL as well... */
9085 }
9086 if (off >= 0)
9087 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9088 }
9089 }
9090 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9091 {
9092 const char *pszAddr = strchr(pInstr->op_str, '#');
9093 if (pszAddr)
9094 {
9095 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9096 if (uAddr != 0)
9097 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9098 }
9099 }
9100# endif
9101 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9102 return NULL;
9103}
9104#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9105
9106
9107DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9108{
9109 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9110#if defined(RT_ARCH_AMD64)
9111 static const char * const a_apszMarkers[] =
9112 {
9113 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9114 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9115 };
9116#endif
9117
9118 char szDisBuf[512];
9119 DISSTATE Dis;
9120 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9121 uint32_t const cNative = pTb->Native.cInstructions;
9122 uint32_t offNative = 0;
9123#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9124 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9125#endif
9126 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9127 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9128 : DISCPUMODE_64BIT;
9129#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9130 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9131#else
9132 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9133#endif
9134#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9135 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9136#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9137 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9138#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9139# error "Port me"
9140#else
9141 csh hDisasm = ~(size_t)0;
9142# if defined(RT_ARCH_AMD64)
9143 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9144# elif defined(RT_ARCH_ARM64)
9145 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9146# else
9147# error "Port me"
9148# endif
9149 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9150
9151 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9152 //Assert(rcCs == CS_ERR_OK);
9153#endif
9154
9155 /*
9156 * Print TB info.
9157 */
9158 pHlp->pfnPrintf(pHlp,
9159 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9160 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9161 pTb, pTb->GCPhysPc,
9162#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9163 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9164#else
9165 pTb->FlatPc,
9166#endif
9167 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9168 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9169#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9170 if (pDbgInfo && pDbgInfo->cEntries > 1)
9171 {
9172 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9173
9174 /*
9175 * This disassembly is driven by the debug info which follows the native
9176 * code and indicates when it starts with the next guest instructions,
9177 * where labels are and such things.
9178 */
9179 uint32_t idxThreadedCall = 0;
9180 uint32_t idxGuestInstr = 0;
9181 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9182 uint8_t idxRange = UINT8_MAX;
9183 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9184 uint32_t offRange = 0;
9185 uint32_t offOpcodes = 0;
9186 uint32_t const cbOpcodes = pTb->cbOpcodes;
9187 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9188 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9189 uint32_t iDbgEntry = 1;
9190 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9191
9192 while (offNative < cNative)
9193 {
9194 /* If we're at or have passed the point where the next chunk of debug
9195 info starts, process it. */
9196 if (offDbgNativeNext <= offNative)
9197 {
9198 offDbgNativeNext = UINT32_MAX;
9199 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9200 {
9201 switch ((IEMTBDBGENTRYTYPE)pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9202 {
9203 case kIemTbDbgEntryType_GuestInstruction:
9204 {
9205 /* Did the exec flag change? */
9206 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9207 {
9208 pHlp->pfnPrintf(pHlp,
9209 " fExec change %#08x -> %#08x %s\n",
9210 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9211 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9212 szDisBuf, sizeof(szDisBuf)));
9213 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9214 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9215 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9216 : DISCPUMODE_64BIT;
9217 }
9218
9219 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9220 where the compilation was aborted before the opcode was recorded and the actual
9221 instruction was translated to a threaded call. This may happen when we run out
9222 of ranges, or when some complicated interrupts/FFs are found to be pending or
9223 similar. So, we just deal with it here rather than in the compiler code as it
9224 is a lot simpler to do here. */
9225 if ( idxRange == UINT8_MAX
9226 || idxRange >= cRanges
9227 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9228 {
9229 idxRange += 1;
9230 if (idxRange < cRanges)
9231 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9232 else
9233 continue;
9234 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9235 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9236 + (pTb->aRanges[idxRange].idxPhysPage == 0
9237 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9238 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9239 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9240 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9241 pTb->aRanges[idxRange].idxPhysPage);
9242 GCPhysPc += offRange;
9243 }
9244
9245 /* Disassemble the instruction. */
9246 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9247 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9248 uint32_t cbInstr = 1;
9249 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9250 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9251 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9252 if (RT_SUCCESS(rc))
9253 {
9254 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9255 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9256 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9257 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9258
9259 static unsigned const s_offMarker = 55;
9260 static char const s_szMarker[] = " ; <--- guest";
9261 if (cch < s_offMarker)
9262 {
9263 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9264 cch = s_offMarker;
9265 }
9266 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9267 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9268
9269 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9270 }
9271 else
9272 {
9273 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9274 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9275 cbInstr = 1;
9276 }
9277 idxGuestInstr++;
9278 GCPhysPc += cbInstr;
9279 offOpcodes += cbInstr;
9280 offRange += cbInstr;
9281 continue;
9282 }
9283
9284 case kIemTbDbgEntryType_ThreadedCall:
9285 pHlp->pfnPrintf(pHlp,
9286 " Call #%u to %s (%u args) - %s\n",
9287 idxThreadedCall,
9288 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9289 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9290 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9291 idxThreadedCall++;
9292 continue;
9293
9294 case kIemTbDbgEntryType_GuestRegShadowing:
9295 {
9296 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9297 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9298 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9299 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9300 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9301 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9302 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9303 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9304 else
9305 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9306 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9307 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9308 continue;
9309 }
9310
9311# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9312 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9313 {
9314 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9315 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9316 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9317 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9318 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9319 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9320 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9321 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9322 else
9323 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9324 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9325 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9326 continue;
9327 }
9328# endif
9329
9330 case kIemTbDbgEntryType_Label:
9331 {
9332 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9333 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9334 {
9335 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9336 ? " ; regs state restored pre-if-block" : "";
9337 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9338 }
9339 else
9340 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9341 continue;
9342 }
9343
9344 case kIemTbDbgEntryType_NativeOffset:
9345 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9346 Assert(offDbgNativeNext >= offNative);
9347 break;
9348
9349# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9350 case kIemTbDbgEntryType_DelayedPcUpdate:
9351 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9352 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9353 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9354 continue;
9355# endif
9356
9357# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9358 case kIemTbDbgEntryType_GuestRegDirty:
9359 {
9360 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9361 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9362 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9363 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9364 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9365 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9366 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9367 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9368 pszGstReg, pszHstReg);
9369 continue;
9370 }
9371
9372 case kIemTbDbgEntryType_GuestRegWriteback:
9373 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9374 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9375 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9376 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9377 continue;
9378# endif
9379
9380# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9381 case kIemTbDbgEntryType_PostponedEFlagsCalc:
9382 {
9383 const char *pszOp = "!unknown!";
9384 switch ((IEMNATIVE_POSTPONED_EFL_OP_T)pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.enmOp)
9385 {
9386 case kIemNativePostponedEflOp_Logical: pszOp = "logical"; break;
9387 case kIemNativePostponedEflOp_Invalid: break;
9388 case kIemNativePostponedEflOp_End: break;
9389 }
9390 pHlp->pfnPrintf(pHlp, " Postponed EFLAGS calc #%u: %s %u bits\n",
9391 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.idxEmit, pszOp,
9392 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.cOpBits);
9393 continue;
9394 }
9395# endif
9396 default:
9397 AssertFailed();
9398 continue;
9399 }
9400 /* Break out of the loop at kIemTbDbgEntryType_NativeOffset. */
9401 iDbgEntry++;
9402 break;
9403 }
9404 }
9405
9406 /*
9407 * Disassemble the next native instruction.
9408 */
9409 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9410# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9411 uint32_t cbInstr = sizeof(paNative[0]);
9412 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9413 if (RT_SUCCESS(rc))
9414 {
9415# if defined(RT_ARCH_AMD64)
9416 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9417 {
9418 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9419 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9420 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9421 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9422 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9423 uInfo & 0x8000 ? "recompiled" : "todo");
9424 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9425 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9426 else
9427 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9428 }
9429 else
9430# endif
9431 {
9432 const char *pszAnnotation = NULL;
9433# ifdef RT_ARCH_AMD64
9434 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9435 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9436 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9437 iemNativeDisasmGetSymbolCb, &SymCtx);
9438 PCDISOPPARAM pMemOp;
9439 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9440 pMemOp = &Dis.aParams[0];
9441 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9442 pMemOp = &Dis.aParams[1];
9443 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9444 pMemOp = &Dis.aParams[2];
9445 else
9446 pMemOp = NULL;
9447 if ( pMemOp
9448 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9449 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9450 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9451 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9452
9453# elif defined(RT_ARCH_ARM64)
9454 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9455 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9456 iemNativeDisasmGetSymbolCb, &SymCtx);
9457# else
9458# error "Port me"
9459# endif
9460 if (pszAnnotation)
9461 {
9462 static unsigned const s_offAnnotation = 55;
9463 size_t const cchAnnotation = strlen(pszAnnotation);
9464 size_t cchDis = strlen(szDisBuf);
9465 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9466 {
9467 if (cchDis < s_offAnnotation)
9468 {
9469 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9470 cchDis = s_offAnnotation;
9471 }
9472 szDisBuf[cchDis++] = ' ';
9473 szDisBuf[cchDis++] = ';';
9474 szDisBuf[cchDis++] = ' ';
9475 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9476 }
9477 }
9478 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9479 }
9480 }
9481 else
9482 {
9483# if defined(RT_ARCH_AMD64)
9484 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9485 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9486# elif defined(RT_ARCH_ARM64)
9487 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9488# else
9489# error "Port me"
9490# endif
9491 cbInstr = sizeof(paNative[0]);
9492 }
9493 offNative += cbInstr / sizeof(paNative[0]);
9494
9495# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9496 cs_insn *pInstr;
9497 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9498 (uintptr_t)pNativeCur, 1, &pInstr);
9499 if (cInstrs > 0)
9500 {
9501 Assert(cInstrs == 1);
9502 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9503 size_t const cchOp = strlen(pInstr->op_str);
9504# if defined(RT_ARCH_AMD64)
9505 if (pszAnnotation)
9506 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9507 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9508 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9509 else
9510 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9511 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9512
9513# else
9514 if (pszAnnotation)
9515 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9516 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9517 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9518 else
9519 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9520 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9521# endif
9522 offNative += pInstr->size / sizeof(*pNativeCur);
9523 cs_free(pInstr, cInstrs);
9524 }
9525 else
9526 {
9527# if defined(RT_ARCH_AMD64)
9528 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9529 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9530# else
9531 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9532# endif
9533 offNative++;
9534 }
9535# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9536 }
9537 }
9538 else
9539#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9540 {
9541 /*
9542 * No debug info, just disassemble the x86 code and then the native code.
9543 *
9544 * First the guest code:
9545 */
9546 for (unsigned i = 0; i < pTb->cRanges; i++)
9547 {
9548 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9549 + (pTb->aRanges[i].idxPhysPage == 0
9550 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9551 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9552 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9553 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9554 unsigned off = pTb->aRanges[i].offOpcodes;
9555 /** @todo this ain't working when crossing pages! */
9556 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9557 while (off < cbOpcodes)
9558 {
9559 uint32_t cbInstr = 1;
9560 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9561 &pTb->pabOpcodes[off], cbOpcodes - off,
9562 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9563 if (RT_SUCCESS(rc))
9564 {
9565 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9566 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9567 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9568 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9569 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9570 GCPhysPc += cbInstr;
9571 off += cbInstr;
9572 }
9573 else
9574 {
9575 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9576 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9577 break;
9578 }
9579 }
9580 }
9581
9582 /*
9583 * Then the native code:
9584 */
9585 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9586 while (offNative < cNative)
9587 {
9588 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9589#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9590 uint32_t cbInstr = sizeof(paNative[0]);
9591 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9592 if (RT_SUCCESS(rc))
9593 {
9594# if defined(RT_ARCH_AMD64)
9595 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9596 {
9597 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9598 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9599 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9600 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9601 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9602 uInfo & 0x8000 ? "recompiled" : "todo");
9603 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9604 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9605 else
9606 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9607 }
9608 else
9609# endif
9610 {
9611# ifdef RT_ARCH_AMD64
9612 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9613 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9614 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9615 iemNativeDisasmGetSymbolCb, &SymCtx);
9616# elif defined(RT_ARCH_ARM64)
9617 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9618 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9619 iemNativeDisasmGetSymbolCb, &SymCtx);
9620# else
9621# error "Port me"
9622# endif
9623 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9624 }
9625 }
9626 else
9627 {
9628# if defined(RT_ARCH_AMD64)
9629 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9630 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9631# else
9632 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9633# endif
9634 cbInstr = sizeof(paNative[0]);
9635 }
9636 offNative += cbInstr / sizeof(paNative[0]);
9637
9638#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9639 cs_insn *pInstr;
9640 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9641 (uintptr_t)pNativeCur, 1, &pInstr);
9642 if (cInstrs > 0)
9643 {
9644 Assert(cInstrs == 1);
9645 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9646 size_t const cchOp = strlen(pInstr->op_str);
9647# if defined(RT_ARCH_AMD64)
9648 if (pszAnnotation)
9649 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9650 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9651 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9652 else
9653 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9654 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9655
9656# else
9657 if (pszAnnotation)
9658 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9659 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9660 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9661 else
9662 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9663 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9664# endif
9665 offNative += pInstr->size / sizeof(*pNativeCur);
9666 cs_free(pInstr, cInstrs);
9667 }
9668 else
9669 {
9670# if defined(RT_ARCH_AMD64)
9671 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9672 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9673# else
9674 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9675# endif
9676 offNative++;
9677 }
9678#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9679 }
9680 }
9681
9682#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9683 /* Cleanup. */
9684 cs_close(&hDisasm);
9685#endif
9686}
9687
9688
9689/** Emit alignment padding between labels / functions. */
9690DECL_INLINE_THROW(uint32_t)
9691iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9692{
9693 if (off & fAlignMask)
9694 {
9695 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9696 while (off & fAlignMask)
9697#if defined(RT_ARCH_AMD64)
9698 pCodeBuf[off++] = 0xcc;
9699#elif defined(RT_ARCH_ARM64)
9700 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9701#else
9702# error "port me"
9703#endif
9704 }
9705 return off;
9706}
9707
9708
9709/**
9710 * Called when a new chunk is allocate to emit common per-chunk code.
9711 *
9712 * Allocates a per-chunk context directly from the chunk itself and place the
9713 * common code there.
9714 *
9715 * @returns VBox status code.
9716 * @param pVCpu The cross context virtual CPU structure of the calling
9717 * thread.
9718 * @param idxChunk The index of the chunk being added and requiring a
9719 * common code context.
9720 * @param ppCtx Where to return the pointer to the chunk context start.
9721 */
9722DECLHIDDEN(int) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk, PCIEMNATIVEPERCHUNKCTX *ppCtx)
9723{
9724 *ppCtx = NULL;
9725
9726 /*
9727 * Allocate a new recompiler state (since we're likely to be called while
9728 * the default one is fully loaded already with a recompiled TB).
9729 *
9730 * This is a bit of overkill, but this isn't a frequently used code path.
9731 */
9732 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9733 AssertReturn(pReNative, VERR_NO_MEMORY);
9734
9735#if defined(RT_ARCH_AMD64)
9736 uint32_t const fAlignMask = 15;
9737#elif defined(RT_ARCH_ARM64)
9738 uint32_t const fAlignMask = 31 / 4;
9739#else
9740# error "port me"
9741#endif
9742 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9743 int rc = VINF_SUCCESS;
9744 uint32_t off = 0;
9745
9746 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9747 {
9748 /*
9749 * Emit the epilog code.
9750 */
9751 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9752 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9753 uint32_t const offReturnWithStatus = off;
9754 off = iemNativeEmitCoreEpilog(pReNative, off);
9755
9756 /*
9757 * Generate special jump labels. All of these gets a copy of the epilog code.
9758 */
9759 static struct
9760 {
9761 IEMNATIVELABELTYPE enmExitReason;
9762 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9763 } const s_aSpecialWithEpilogs[] =
9764 {
9765 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9766 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9767 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9768 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9769 };
9770 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9771 {
9772 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9773 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9774 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9775 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9776 off = iemNativeEmitCoreEpilog(pReNative, off);
9777 }
9778
9779 /*
9780 * Do what iemNativeEmitReturnBreakViaLookup does.
9781 */
9782 static struct
9783 {
9784 IEMNATIVELABELTYPE enmExitReason;
9785 uintptr_t pfnHelper;
9786 } const s_aViaLookup[] =
9787 {
9788 { kIemNativeLabelType_ReturnBreakViaLookup,
9789 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9790 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9791 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9792 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9793 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9794 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9795 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9796 };
9797 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9798 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9799 {
9800 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9801 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9802 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9803 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9804 }
9805
9806 /*
9807 * Generate simple TB tail labels that just calls a help with a pVCpu
9808 * arg and either return or longjmps/throws a non-zero status.
9809 */
9810 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9811 static struct
9812 {
9813 IEMNATIVELABELTYPE enmExitReason;
9814 bool fWithEpilog;
9815 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9816 } const s_aSimpleTailLabels[] =
9817 {
9818 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9819 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9820 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9821 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9822 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9823 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9824 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9825 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9826 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9827 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9828 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9829 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9830 };
9831 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9832 {
9833 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9834 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9835 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9836
9837 /* int pfnCallback(PVMCPUCC pVCpu) */
9838 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9839 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9840
9841 /* If the callback is supposed to return with a status code we inline the epilog
9842 sequence for better speed. Otherwise, if the callback shouldn't return because
9843 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
9844 if (s_aSimpleTailLabels[i].fWithEpilog)
9845 off = iemNativeEmitCoreEpilog(pReNative, off);
9846 else
9847 {
9848#ifdef VBOX_STRICT
9849 off = iemNativeEmitBrk(pReNative, off, 0x2201);
9850#endif
9851 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
9852 }
9853 }
9854
9855
9856#ifdef VBOX_STRICT
9857 /* Make sure we've generate code for all labels. */
9858 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
9859 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
9860#endif
9861 }
9862 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9863 {
9864 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
9865 iemNativeTerm(pReNative);
9866 return rc;
9867 }
9868 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9869
9870 /*
9871 * Allocate memory for the context (first) and the common code (last).
9872 */
9873 PIEMNATIVEPERCHUNKCTX pCtx;
9874 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
9875 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
9876 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
9877 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
9878 AssertLogRelMsgReturnStmt(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk),
9879 iemNativeTerm(pReNative), VERR_OUT_OF_RESOURCES);
9880
9881 /*
9882 * Copy over the generated code.
9883 * There should be no fixups or labels defined here.
9884 */
9885 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
9886 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
9887
9888 Assert(pReNative->cFixups == 0);
9889 Assert(pReNative->cLabels == 0);
9890
9891 /*
9892 * Initialize the context.
9893 */
9894 AssertCompile(kIemNativeLabelType_Invalid == 0);
9895 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
9896 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
9897 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
9898 {
9899 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
9900 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
9901 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
9902 }
9903
9904 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
9905
9906 iemNativeTerm(pReNative);
9907 *ppCtx = pCtx;
9908 return VINF_SUCCESS;
9909}
9910
9911
9912/**
9913 * Recompiles the given threaded TB into a native one.
9914 *
9915 * In case of failure the translation block will be returned as-is.
9916 *
9917 * @returns pTb.
9918 * @param pVCpu The cross context virtual CPU structure of the calling
9919 * thread.
9920 * @param pTb The threaded translation to recompile to native.
9921 */
9922DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9923{
9924#if 0 /* For profiling the native recompiler code. */
9925l_profile_again:
9926#endif
9927 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9928
9929 /*
9930 * The first time thru, we allocate the recompiler state and save it,
9931 * all the other times we'll just reuse the saved one after a quick reset.
9932 */
9933 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9934 if (RT_LIKELY(pReNative))
9935 iemNativeReInit(pReNative, pTb);
9936 else
9937 {
9938 pReNative = iemNativeInit(pVCpu, pTb);
9939 AssertReturn(pReNative, pTb);
9940 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
9941 }
9942
9943#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9944 /*
9945 * First do liveness analysis. This is done backwards.
9946 */
9947 {
9948 uint32_t idxCall = pTb->Thrd.cCalls;
9949 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9950 { /* likely */ }
9951 else
9952 {
9953 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9954 while (idxCall > cAlloc)
9955 cAlloc *= 2;
9956 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9957 AssertReturn(pvNew, pTb);
9958 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9959 pReNative->cLivenessEntriesAlloc = cAlloc;
9960 }
9961 AssertReturn(idxCall > 0, pTb);
9962 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9963
9964 /* The initial (final) entry. */
9965 idxCall--;
9966 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9967
9968 /* Loop backwards thru the calls and fill in the other entries. */
9969 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9970 while (idxCall > 0)
9971 {
9972 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9973 Assert(pfnLiveness);
9974 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9975 pCallEntry--;
9976 idxCall--;
9977 }
9978 }
9979#endif
9980
9981 /*
9982 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9983 * for aborting if an error happens.
9984 */
9985 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9986#ifdef LOG_ENABLED
9987 uint32_t const cCallsOrg = cCallsLeft;
9988#endif
9989 uint32_t off = 0;
9990 int rc = VINF_SUCCESS;
9991 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9992 {
9993 /*
9994 * Convert the calls to native code.
9995 */
9996#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9997 int32_t iGstInstr = -1;
9998#endif
9999#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10000 uint32_t cThreadedCalls = 0;
10001 uint32_t cRecompiledCalls = 0;
10002#endif
10003#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10004 uint32_t idxCurCall = 0;
10005#endif
10006 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10007 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10008 while (cCallsLeft-- > 0)
10009 {
10010 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10011#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10012 pReNative->idxCurCall = idxCurCall;
10013#endif
10014
10015#ifdef IEM_WITH_INTRA_TB_JUMPS
10016 /*
10017 * Define label for jump targets (currently only the first entry).
10018 */
10019 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10020 { /* likely */ }
10021 else
10022 {
10023 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10024 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10025 }
10026#endif
10027
10028 /*
10029 * Debug info, assembly markup and statistics.
10030 */
10031#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10032 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10033 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10034#endif
10035#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10036 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10037 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10038 {
10039 if (iGstInstr < (int32_t)pTb->cInstructions)
10040 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10041 else
10042 Assert(iGstInstr == pTb->cInstructions);
10043 iGstInstr = pCallEntry->idxInstr;
10044 }
10045 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10046#endif
10047#if defined(VBOX_STRICT)
10048 off = iemNativeEmitMarker(pReNative, off,
10049 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10050#endif
10051#if defined(VBOX_STRICT)
10052 iemNativeRegAssertSanity(pReNative);
10053#endif
10054#ifdef VBOX_WITH_STATISTICS
10055 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10056#endif
10057
10058#if 0
10059 if ( pTb->GCPhysPc == 0x00000000000c1240
10060 && idxCurCall == 67)
10061 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10062#endif
10063
10064 /*
10065 * Actual work.
10066 */
10067 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10068 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10069 if (pfnRecom) /** @todo stats on this. */
10070 {
10071 off = pfnRecom(pReNative, off, pCallEntry);
10072 STAM_REL_STATS({cRecompiledCalls++;});
10073 }
10074 else
10075 {
10076 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10077 STAM_REL_STATS({cThreadedCalls++;});
10078 }
10079 Assert(off <= pReNative->cInstrBufAlloc);
10080 Assert(pReNative->cCondDepth == 0);
10081
10082#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10083 if (LogIs2Enabled())
10084 {
10085 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10086# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10087 static const char s_achState[] = "CUXI";
10088# else
10089 /* 0123 4567 89ab cdef */
10090 /* CCCC CCCC */
10091 /* WWWW WWWW */
10092 /* RR RR RR RR */
10093 /* P P P P P P P P */
10094 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10095# endif
10096
10097 char szGpr[17];
10098 for (unsigned i = 0; i < 16; i++)
10099 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10100 szGpr[16] = '\0';
10101
10102 char szSegBase[X86_SREG_COUNT + 1];
10103 char szSegLimit[X86_SREG_COUNT + 1];
10104 char szSegAttrib[X86_SREG_COUNT + 1];
10105 char szSegSel[X86_SREG_COUNT + 1];
10106 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10107 {
10108 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10109 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10110 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10111 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10112 }
10113 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10114 = szSegSel[X86_SREG_COUNT] = '\0';
10115
10116 char szEFlags[IEMLIVENESSBIT_IDX_EFL_COUNT + 1];
10117 for (unsigned i = 0; i < IEMLIVENESSBIT_IDX_EFL_COUNT; i++)
10118 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10119 szEFlags[7] = '\0';
10120
10121 Log2(("liveness: gpr=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10122 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10123 }
10124#endif
10125
10126 /*
10127 * Advance.
10128 */
10129 pCallEntry++;
10130#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10131 idxCurCall++;
10132#endif
10133 }
10134
10135 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10136 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10137 if (!cThreadedCalls)
10138 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10139
10140 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, UINT32_MAX);
10141
10142#ifdef VBOX_WITH_STATISTICS
10143 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10144#endif
10145
10146 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10147 off = iemNativeRegFlushPendingWrites(pReNative, off);
10148
10149 /*
10150 * Jump to the common per-chunk epilog code.
10151 */
10152 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10153 off = iemNativeEmitTbExit<kIemNativeLabelType_ReturnSuccess, true, false>(pReNative, off);
10154
10155 /*
10156 * Generate tail labels with jumps to the common per-chunk code on non-x86 hosts.
10157 */
10158#ifndef RT_ARCH_AMD64
10159 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10160 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10161 AssertCompile(kIemNativeLabelType_Invalid == 0);
10162 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10163 if (fTailLabels)
10164 {
10165 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, kIemNativeLabelType_LastTbExit + 1);
10166 do
10167 {
10168 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10169 fTailLabels &= ~RT_BIT_64(enmLabel);
10170
10171 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10172 AssertContinue(idxLabel != UINT32_MAX);
10173 iemNativeLabelDefine(pReNative, idxLabel, off);
10174
10175 iemNativeAddTbExitFixup(pReNative, off, enmLabel);
10176# ifdef RT_ARCH_ARM64
10177 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
10178# else
10179# error "port me"
10180# endif
10181 } while (fTailLabels);
10182 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10183 }
10184#else
10185 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10186#endif
10187 }
10188 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10189 {
10190 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10191 return pTb;
10192 }
10193 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10194 Assert(off <= pReNative->cInstrBufAlloc);
10195
10196 /*
10197 * Make sure all labels has been defined.
10198 */
10199 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10200#ifdef VBOX_STRICT
10201 uint32_t const cLabels = pReNative->cLabels;
10202 for (uint32_t i = 0; i < cLabels; i++)
10203 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10204#endif
10205
10206#if 0 /* For profiling the native recompiler code. */
10207 if (pTb->Thrd.cCalls >= 136)
10208 {
10209 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10210 goto l_profile_again;
10211 }
10212#endif
10213
10214 /*
10215 * Allocate executable memory, copy over the code we've generated.
10216 */
10217 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10218 if (pTbAllocator->pDelayedFreeHead)
10219 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10220
10221 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10222 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10223 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10224 &paFinalInstrBufRx, &pCtx);
10225
10226 AssertReturn(paFinalInstrBuf, pTb);
10227 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10228
10229 /*
10230 * Apply fixups.
10231 */
10232 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10233 uint32_t const cFixups = pReNative->cFixups;
10234 for (uint32_t i = 0; i < cFixups; i++)
10235 {
10236 Assert(paFixups[i].off < off);
10237 Assert(paFixups[i].idxLabel < cLabels);
10238 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10239 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10240 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10241 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10242 switch (paFixups[i].enmType)
10243 {
10244#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10245 case kIemNativeFixupType_Rel32:
10246 Assert(paFixups[i].off + 4 <= off);
10247 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10248 continue;
10249
10250#elif defined(RT_ARCH_ARM64)
10251 case kIemNativeFixupType_RelImm26At0:
10252 {
10253 Assert(paFixups[i].off < off);
10254 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10255 Assert(offDisp >= -33554432 && offDisp < 33554432);
10256 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10257 continue;
10258 }
10259
10260 case kIemNativeFixupType_RelImm19At5:
10261 {
10262 Assert(paFixups[i].off < off);
10263 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10264 Assert(offDisp >= -262144 && offDisp < 262144);
10265 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10266 continue;
10267 }
10268
10269 case kIemNativeFixupType_RelImm14At5:
10270 {
10271 Assert(paFixups[i].off < off);
10272 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10273 Assert(offDisp >= -8192 && offDisp < 8192);
10274 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10275 continue;
10276 }
10277
10278#endif
10279 case kIemNativeFixupType_Invalid:
10280 case kIemNativeFixupType_End:
10281 break;
10282 }
10283 AssertFailed();
10284 }
10285
10286 /*
10287 * Apply TB exit fixups.
10288 */
10289 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10290 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10291 for (uint32_t i = 0; i < cTbExitFixups; i++)
10292 {
10293 Assert(paTbExitFixups[i].off < off);
10294 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10295 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10296
10297#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10298 Assert(paTbExitFixups[i].off + 4 <= off);
10299 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10300 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10301 *Ptr.pi32 = (int32_t)offDisp;
10302
10303#elif defined(RT_ARCH_ARM64)
10304 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10305 Assert(offDisp >= -33554432 && offDisp < 33554432);
10306 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10307
10308#else
10309# error "Port me!"
10310#endif
10311 }
10312
10313 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10314 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10315
10316 /*
10317 * Convert the translation block.
10318 */
10319 RTMemFree(pTb->Thrd.paCalls);
10320 pTb->Native.paInstructions = paFinalInstrBufRx;
10321 pTb->Native.cInstructions = off;
10322 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10323#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10324 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10325 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10326 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10327#endif
10328
10329 Assert(pTbAllocator->cThreadedTbs > 0);
10330 pTbAllocator->cThreadedTbs -= 1;
10331 pTbAllocator->cNativeTbs += 1;
10332 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10333
10334#ifdef LOG_ENABLED
10335 /*
10336 * Disassemble to the log if enabled.
10337 */
10338 if (LogIs3Enabled())
10339 {
10340 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10341 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10342# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10343 RTLogFlush(NULL);
10344# endif
10345 }
10346#endif
10347 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10348
10349 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10350 return pTb;
10351}
10352
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette